2020 QuestionTypeResponse ,
2121 QuestionType ,
2222 Template ,
23+ # Service internal models
24+ QuestionInfo ,
25+ GenerationSlot ,
2326)
2427from app .config import settings
28+ import re
2529
2630logger = logging .getLogger (__name__ )
2731
@@ -131,7 +135,7 @@ def _get_grammar_topics(self, request: QuestionBankPartsGenerationRequest) -> st
131135
132136 def _build_generation_slots (
133137 self , request : QuestionBankPartsGenerationRequest
134- ) -> List [Dict [ str , Any ] ]:
138+ ) -> List [GenerationSlot ]:
135139 """Build generation slots from template distributions, grouped by unit with max 20 questions per slot."""
136140 units_dict = self ._get_unit_los_dict (request )
137141 units_index_path_dict = self ._get_unit_index_path_dict (request )
@@ -152,12 +156,12 @@ def _build_generation_slots(
152156 f"Unit Name `{ dist .unit_name } ` is not present in the `chapters` attribute in request"
153157 )
154158
155- question_info = {
156- " type" : template .type ,
157- " objective" : dist .objective ,
158- " marks_per_question" : template .marks_per_question ,
159- " schema_hint" : template .type .schema_dict (),
160- }
159+ question_info = QuestionInfo (
160+ type = template .type ,
161+ objective = dist .objective ,
162+ marks_per_question = template .marks_per_question ,
163+ schema_hint = template .type .schema_dict (),
164+ )
161165
162166 if dist .unit_name not in unit_questions :
163167 unit_questions [dist .unit_name ] = []
@@ -175,12 +179,12 @@ def _build_generation_slots(
175179 for i in range (0 , len (questions ), self .max_questions_per_slot ):
176180 batch = questions [i : i + self .max_questions_per_slot ]
177181 slots .append (
178- {
179- " unit_name" : unit_name ,
180- " learning_outcomes" : unit_los , # Same for all questions in unit
181- " questions" : batch ,
182- " index_path" : index_path ,
183- }
182+ GenerationSlot (
183+ unit_name = unit_name ,
184+ learning_outcomes = unit_los , # Same for all questions in unit
185+ questions = batch ,
186+ index_path = index_path ,
187+ )
184188 )
185189
186190 return slots
@@ -225,16 +229,16 @@ def _format_system_prompt(
225229 self ,
226230 request : QuestionBankPartsGenerationRequest ,
227231 existing_questions : List [str ],
228- slot : Dict [ str , Any ] ,
232+ slot : GenerationSlot ,
229233 ) -> str :
230234 """Format the system prompt using YAML templates for a specific unit slot."""
231235 try :
232236 # Get the main template
233237 template = self .prompts .get ("question_bank_parts_gen" , "" )
234238
235239 # Get unit-specific information from the slot
236- unit_name = slot [ " unit_name" ]
237- learning_outcomes = slot [ " learning_outcomes" ]
240+ unit_name = slot . unit_name
241+ learning_outcomes = slot . learning_outcomes
238242
239243 # Get Bloom's taxonomy guide
240244 blooms_guide = self .prompts .get ("blooms-taxonomy" , {}).get ("general" , "" )
@@ -275,38 +279,43 @@ def _get_format_instruction_for_type(self, qtype: QuestionType) -> str:
275279 return f"- For { qtype } : { qtype .description } . Use format: { qtype .schema_dict ()} "
276280
277281 async def _generate_questions_batch (
278- self , system_prompt : str , slot : Dict [ str , Any ] , rag_adapter : BaseRagAdapter
282+ self , system_prompt : str , slot : GenerationSlot , rag_adapter : BaseRagAdapter
279283 ) -> List [Dict [str , Any ]]:
280284 """Generate questions for a batch of slots using RAG adapter."""
281285 # Build slot directives from the new slot structure
282- slot_questions = slot [ " questions" ]
286+ slot_questions = slot . questions
283287
284288 # Generate dynamic format rules from QuestionType enum
285- unique_types = set ( q [ "type" ] for q in slot_questions )
289+ unique_types = slot . unique_question_types
286290 format_rules = [
287291 self ._get_format_instruction_for_type (qtype ) for qtype in unique_types
288292 ]
289293 format_rules_text = "\n " .join (format_rules )
290294
295+ # Convert QuestionInfo objects to dict for JSON serialization
296+ slot_questions_dict = [
297+ {
298+ "type" : q .type ,
299+ "objective" : q .objective ,
300+ "marks_per_question" : q .marks_per_question ,
301+ "schema_hint" : q .schema_hint ,
302+ }
303+ for q in slot_questions
304+ ]
305+
291306 user_message = (
292307 "Generate questions for the following slots in a SINGLE JSON object with an `items` array. "
293308 "For each slot, return exactly ONE object with the following fields:\n "
294- "`unit_name`, ` type`, `objective`, `marks_per_question` and `item`\n "
309+ "`type`, `objective`, `marks_per_question` and `item`\n "
295310 "`item` field should adhere to the question's `schema_hint`.\n \n "
296311 "Format rules by question type:\n "
297312 f"{ format_rules_text } \n "
298- f"Question slots:\n { json .dumps (slot_questions , ensure_ascii = False )} "
313+ f"Question slots:\n { json .dumps (slot_questions_dict , ensure_ascii = False )} "
299314 )
300315
301316 # Build chat history with system message only (as per requirement)
302317 chat_history = [ChatMessage (role = "system" , content = system_prompt )]
303318
304- print (
305- "********************* SYSTEM PROMPT *********************" ,
306- system_prompt ,
307- )
308- print ("********************* USER MESSAGE *********************" , user_message )
309-
310319 # Use RAG adapter to chat with index
311320 response_content = await rag_adapter .chat_with_index (
312321 curr_message = user_message , chat_history = chat_history
@@ -318,7 +327,7 @@ async def _generate_questions_batch(
318327 # Parse response
319328 response_data = json .loads (content )
320329
321- print (
330+ logger . info (
322331 "********************** RESPONSE DATA **********************" ,
323332 json .dumps (response_data , indent = 2 ),
324333 )
@@ -329,12 +338,16 @@ async def _generate_questions_batch(
329338 logger .warning ("No items found in completion response" )
330339 return []
331340
341+ # Add unit_name to each generated item for proper key generation
342+ for item in items :
343+ item ["unit_name" ] = slot .unit_name
344+
332345 return items
333346
334347 async def _generate_questions_batch_async (
335348 self ,
336349 system_prompt : str ,
337- slot : Dict [ str , Any ] ,
350+ slot : GenerationSlot ,
338351 rag_adapter : BaseRagAdapter ,
339352 delay_seconds : int = 0 ,
340353 ) -> List [Dict [str , Any ]]:
@@ -356,16 +369,29 @@ def _organize_questions_into_response(
356369 # Create a question directory to organize questions by their specification
357370 question_directory = {}
358371
372+ def normalize_key_component (text : str ) -> str :
373+ """Normalize text by removing invisible chars, extra spaces, and converting to lowercase."""
374+ if not text :
375+ return ""
376+ # Remove invisible characters (control chars, zero-width spaces, etc.)
377+ # Remove control characters except tab, newline, carriage return
378+ text = re .sub (r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]" , "" , str (text ))
379+ # Remove zero-width characters
380+ text = re .sub (r"[\u200B-\u200D\uFEFF]" , "" , text )
381+ # Normalize whitespace and strip
382+ text = re .sub (r"\s+" , " " , text ).strip ()
383+ return text .lower ()
384+
359385 # Organize generated questions by (type, marks, unit_name, objective)
360386 for i , generated in enumerate (all_generated ):
361387 qtype = QuestionType (generated .get ("type" ))
362- unit_name = generated .get ("unit_name" )
363- objective = generated .get ("objective" )
364- marks_per_question = generated .get ("marks_per_question" )
388+ unit_name = normalize_key_component ( generated .get ("unit_name" , "" ) )
389+ objective = normalize_key_component ( generated .get ("objective" , "" ) )
390+ marks_per_question = generated .get ("marks_per_question" , 0 )
365391 item = generated .get ("item" )
366392
367- # Normalize key to lowercase to avoid case sensitivity issues
368- key = f"{ qtype .value } |{ marks_per_question } |{ unit_name } |{ objective } " . lower ()
393+ # Create normalized key
394+ key = f"{ qtype .value . lower () } |{ marks_per_question } |{ unit_name } |{ objective } "
369395 if key not in question_directory :
370396 question_directory [key ] = []
371397
@@ -387,8 +413,10 @@ def _organize_questions_into_response(
387413
388414 # Add questions in the order specified by question_distribution
389415 for q_dist in template .question_distribution or []:
390- # Normalize key to lowercase to match the question_directory keys
391- key = f"{ template .type .value } |{ template .marks_per_question } |{ q_dist .unit_name } |{ q_dist .objective } " .lower ()
416+ # Create normalized key to match the question_directory keys
417+ normalized_unit = normalize_key_component (q_dist .unit_name )
418+ normalized_objective = normalize_key_component (q_dist .objective )
419+ key = f"{ template .type .value .lower ()} |{ template .marks_per_question } |{ normalized_unit } |{ normalized_objective } "
392420
393421 if key in question_directory and len (question_directory [key ]) > 0 :
394422 question = question_directory [key ].pop (0 )
@@ -401,6 +429,10 @@ def _organize_questions_into_response(
401429 logger .warning (
402430 f"--\n No question found for Normalized key: { key } "
403431 )
432+ # Debug: show all available keys for troubleshooting
433+ logger .warning (
434+ f"Available keys: { list (question_directory .keys ())} "
435+ )
404436
405437 response_questions .append (question_type_resp )
406438
@@ -453,9 +485,7 @@ async def generate_question_bank_by_parts(
453485 tasks = []
454486 for j , slot in enumerate (batch_slots ):
455487 # Get or create cached RAG adapter instance
456- rag_adapter = await self ._get_or_create_rag_adapter (
457- slot ["index_path" ]
458- )
488+ rag_adapter = await self ._get_or_create_rag_adapter (slot .index_path )
459489 # Initiate the index (download files for InMem, no-op for Qdrant)
460490 await rag_adapter .initiate_index ()
461491
@@ -475,6 +505,9 @@ async def generate_question_bank_by_parts(
475505
476506 # Add all generated questions from this batch
477507 for raw_items in batch_results :
508+ if isinstance (raw_items , Exception ):
509+ logger .error (f"Error in batch generation: { raw_items } " )
510+ continue
478511 if raw_items :
479512 all_generated .extend (raw_items )
480513
0 commit comments