diff --git a/edurate_gensim.py b/edurate_gensim.py index b44e766..6a74e27 100644 --- a/edurate_gensim.py +++ b/edurate_gensim.py @@ -30,8 +30,7 @@ def create_tokens(list_responses): logging.info("Creating tokens") stoplist = get_stop_words('en') tokens = [] - - for res in list_responses: + # determines whether a word is profane and if not, adds it to a list of words that we'll analyze later temp = [] for word in res.split(): if not isinstance(word, int): @@ -45,6 +44,7 @@ def create_tokens(list_responses): def dictionary_create(tokens): + # to clean up the data and show only a single instance of each word in the list while not losing the information of how common the word was, we make a dictionary with words and quantities. """Create the dictionary from the tokens of the answer.""" dictionary = corpora.Dictionary(tokens) diff --git a/graphing.py b/graphing.py index bd5d81d..948841a 100644 --- a/graphing.py +++ b/graphing.py @@ -10,7 +10,7 @@ def graph(data): """ Takes only most recent input data and then displays graphs """ - # get all data because we need to see trend over time + # get all data because we need to see trend over time, given implementation of an arbitrary date retaining method, we would select a more specific range. data = convert_to_ints(data) # display generated graphs print(graph1(data)) diff --git a/read_responses.py b/read_responses.py index 8bb7278..80c8d9e 100644 --- a/read_responses.py +++ b/read_responses.py @@ -14,6 +14,7 @@ def read_responses(filepath): csvdata = list(csv.reader(csvfile, delimiter=',')) responses = list() + # We're separating the responses by timestamp here, not by student, so that we see the entirety of the number responses with the class as our scope. for record in csvdata[1:]: # date of response row = list() diff --git a/spreadsheet.py b/spreadsheet.py index ac3f80e..f2e1b69 100644 --- a/spreadsheet.py +++ b/spreadsheet.py @@ -16,6 +16,7 @@ def read_from_spreadsheet(): logging.info( "Authenticating to Google Sheets to obtain Google Form data") # use creds to create a client to interact with the Google Drive API + # TODO >> create a configuration file that the user can edit for security measures and ease of access. scope = ['https://spreadsheets.google.com/feeds'] creds = ServiceAccountCredentials.from_json_keyfile_name( 'Edurate_Client.json', scope) @@ -31,6 +32,7 @@ def read_from_spreadsheet(): def get_graph_data(spreadsheet_list): + # This lets the program find question numbers and timestamps so that responses can be more easily separated and grouped according to graph data needs. """Format spreadsheet_list for graph.""" new = list() for key in spreadsheet_list[0].keys(): @@ -50,6 +52,8 @@ def get_graph_data(spreadsheet_list): def flip_responses(data): + # Currently is not implemented outside of testing + # If it is necessary, it should be included in the operation of the program """Switch rows and columns in a list of lists.""" if data == []: logging.error("Empty list given. No rows and columns to flip. Returning empty list.") @@ -71,6 +75,7 @@ def flip_responses(data): def filter_dates(data): + # currently, the oldest date to save record for is arbitrary, having a better defined system for how to keep dates could be very useful. """Return a list of responses only from the latest date.""" TIMESTAMP_LOCATION_INDEX = 0 max_date = datetime(2000, 1, 1, 0, 0).date() @@ -94,10 +99,11 @@ def filter_dates(data): def create_csv(spreadsheet_list): """Create CSV file with spreadsheet data.""" - # returns True when funciton is completed + # returns True when funciton is completed, to inform us that the method works logging.info("Creating a list of lists of students") formatted_list = list() # grabs questions from spreadsheet + # currently implemented to run for exactly 10 questions, a solution that can implement an arbitrary number of questions would perhaps be more useful. for entry in spreadsheet_list: questions = [None] * 12 for question, response in entry.items():