kerols77 commited on
Commit
629386e
Β·
verified Β·
1 Parent(s): 09b8b7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +448 -431
app.py CHANGED
@@ -1,431 +1,448 @@
1
- import re
2
- import cv2
3
- import spacy
4
- import numpy as np
5
- import os
6
- import string
7
- import csv
8
- import random
9
- import json
10
- import requests
11
- from collections import OrderedDict
12
- from flask import Flask, request, Response
13
- from paddleocr import PaddleOCR
14
- from sentence_transformers import SentenceTransformer, util
15
- from transformers import pipeline
16
-
17
- # Ensure the language model is available
18
- try:
19
- import en_core_web_md
20
- except ImportError:
21
- print("en_core_web_md not found. Downloading now...")
22
- import spacy.cli
23
- spacy.cli.download("en_core_web_md")
24
- import en_core_web_md
25
-
26
- # Load the model using one method.
27
- nlp = en_core_web_md.load()
28
-
29
- # Initialize other components
30
- ochr = PaddleOCR(use_angle_cls=True, lang='en')
31
- sbert_model = SentenceTransformer("all-mpnet-base-v2")
32
- entailment_classifier = pipeline(
33
- "text-classification",
34
- model="roberta-large-mnli",
35
- return_all_scores=True
36
- )
37
-
38
- app = Flask(__name__)
39
-
40
- def classify_subject(question, candidate_labels=None):
41
- if candidate_labels is None:
42
- candidate_labels = ["Math", "Science", "History", "Literature", "Geography", "Art"]
43
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
44
- result = classifier(question, candidate_labels)
45
- return result["labels"][0]
46
-
47
- def load_advice(filename):
48
- advice_list = []
49
- try:
50
- with open(filename, newline='', encoding='utf-8') as csvfile:
51
- reader = csv.DictReader(csvfile)
52
- for row in reader:
53
- advice_list.append({
54
- "min_score": float(row["min_score"]),
55
- "max_score": float(row["max_score"]),
56
- "subject": row["subject"],
57
- "advice_parent": row["advice_parent"],
58
- "advice_teacher": row["advice_teacher"],
59
- "study_plan": row["study_plan"],
60
- "recommended_books": row["recommended_books"]
61
- })
62
- except Exception as e:
63
- print("Advice file error:", e)
64
- return advice_list
65
-
66
- def get_advice(score, subject, advice_list):
67
- filtered = [a for a in advice_list
68
- if a["subject"].lower() == subject.lower()
69
- and a["min_score"] <= score <= a["max_score"]]
70
- if filtered:
71
- return random.choice(filtered)
72
- return {
73
- "advice_parent": "No parent advice available.",
74
- "advice_teacher": "No teacher advice available.",
75
- "study_plan": "No study plan available.",
76
- "recommended_books": "No books available."
77
- }
78
-
79
- def ocr_from_array(image):
80
- image = np.ascontiguousarray(image)
81
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
82
- result = ochr.ocr(gray, cls=True)
83
- return "\n".join([line[1][0] for line in result[0]])
84
-
85
- def preprocess_text(text):
86
- return " ".join(
87
- token.lemma_ for token in nlp(text.lower())
88
- if not token.is_stop and not token.is_punct
89
- )
90
-
91
- def text_to_vector_sbert(text):
92
- return sbert_model.encode(text, convert_to_tensor=True)
93
-
94
- def compute_similarity(text1, text2):
95
- return util.pytorch_cos_sim(
96
- text_to_vector_sbert(text1),
97
- text_to_vector_sbert(text2)
98
- ).item()
99
-
100
- def contains_keyword(reference, student):
101
- tr = str.maketrans('', '', string.punctuation)
102
- return bool(
103
- set(reference.lower().translate(tr).split()) &
104
- set(student.lower().translate(tr).split())
105
- )
106
-
107
- def check_entailment(student, reference):
108
- scores = entailment_classifier(f"{student} </s></s> {reference}", truncation=True)
109
- for item in scores[0]:
110
- if item["label"] == "ENTAILMENT":
111
- return item["score"]
112
- return 0.0
113
-
114
- def entity_match(ref_ans, stud_ans):
115
- return bool({ent.text.lower() for ent in nlp(ref_ans).ents} &
116
- {ent.text.lower() for ent in nlp(stud_ans).ents})
117
-
118
- def extract_numbers(text):
119
- nums = set(re.findall(r'\d+', text))
120
- words = {"zero": "0", "one": "1", "two": "2", "three": "3",
121
- "four": "4", "five": "5", "six": "6", "seven": "7",
122
- "eight": "8", "nine": "9", "ten": "10"}
123
- for w in text.lower().split():
124
- tok = w.strip(string.punctuation)
125
- if tok in words:
126
- nums.add(words[tok])
127
- return nums
128
-
129
- def is_year(text):
130
- clean = text.strip().replace(".", "")
131
- years = re.findall(r'\d{4}', clean)
132
- return len(years) == 1 and re.sub(r'\d{4}', '', clean).strip(string.punctuation + " ") == ""
133
-
134
- def advanced_grade(ref_ans, stud_ans, similarity, threshold=0.8, max_grade=100):
135
- min_corr, min_inc = 50, 30
136
- tr = str.maketrans('', '', string.punctuation)
137
- r = ref_ans.lower().translate(tr).strip()
138
- s = stud_ans.lower().translate(tr).strip()
139
- base = similarity * max_grade
140
- if is_year(ref_ans):
141
- ref_years = re.findall(r'\d{4}', ref_ans)
142
- stud_years = re.findall(r'\d{4}', stud_ans)
143
- if not stud_years or ref_years[0] != stud_years[0]:
144
- grade = min_inc if contains_keyword(ref_ans, stud_ans) else 0
145
- mark = "Incorrect"
146
- else:
147
- grade, mark = max_grade, "Correct"
148
- elif r == s or (len(s.split()) <= 3 and contains_keyword(ref_ans, stud_ans)) or \
149
- (extract_numbers(stud_ans) & extract_numbers(ref_ans)) or \
150
- check_entailment(stud_ans, ref_ans) > 0.9:
151
- grade, mark = max_grade, "Correct"
152
- elif entity_match(ref_ans, stud_ans) or (contains_keyword(ref_ans, stud_ans) and similarity < threshold):
153
- grade = max(base, threshold * max_grade)
154
- mark = "Correct"
155
- elif contains_keyword(ref_ans, stud_ans) or similarity >= threshold:
156
- grade = min(base + 10, max_grade)
157
- mark = "Correct"
158
- else:
159
- grade = max(base, min_inc) if contains_keyword(ref_ans, stud_ans) else base
160
- mark = "Incorrect"
161
- if mark == "Correct":
162
- rw, sw = len(ref_ans.split()), len(stud_ans.split())
163
- if rw > 0 and sw < rw:
164
- grade = max(min_corr, grade * (sw / rw))
165
- return grade, mark
166
-
167
- def correct_token(token):
168
- rep = {'o':'0','O':'0','l':'1','I':'1','|':'1','z':'2','Z':'2',
169
- 'e':'3','E':'3','a':'4','A':'4','y':'4','Y':'4','s':'5','S':'5',
170
- 'g':'6','G':'6','t':'7','T':'7','b':'8','B':'8','q':'9','Q':'9'}
171
- return ''.join(rep.get(c, c) for c in token)
172
-
173
- def fix_question_prefix(line):
174
- if not line:
175
- return line
176
- first, rest = line[0], line[1:]
177
- mapping = {'I': '1', 'l': '1', '|': '1', 'S': '5', 's': '5'}
178
- if first in mapping and rest and rest[0] in ".- )":
179
- return mapping[first] + rest
180
- return line
181
-
182
- def parse_reference_answers(text):
183
- ref_dict = {}
184
- lines = text.splitlines()
185
- current_question = None
186
- question_text = ""
187
- answer_text = ""
188
- i = 0
189
- while i < len(lines):
190
- raw_line = lines[i]
191
- line = fix_question_prefix(raw_line).strip()
192
- if not line:
193
- i += 1
194
- continue
195
- q_match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
196
- if q_match:
197
- if current_question is not None and question_text:
198
- ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
199
- current_question = int(q_match.group(1))
200
- question_text = q_match.group(2).strip()
201
- answer_text = ""
202
- if i + 1 < len(lines) and "answer" in lines[i+1].lower():
203
- answer_line = fix_question_prefix(lines[i+1]).strip()
204
- answer_match = re.match(r'^answer:?[ \t]*(.+)', answer_line, re.IGNORECASE)
205
- if answer_match:
206
- answer_text = answer_match.group(1).strip()
207
- i += 2
208
- continue
209
- i += 1
210
- continue
211
- a_match = re.match(r'^answer:?[ \t]*(.+)', line, re.IGNORECASE)
212
- if a_match and current_question is not None:
213
- answer_text = a_match.group(1).strip()
214
- i += 1
215
- continue
216
- if current_question is not None:
217
- if not answer_text:
218
- question_text += " " + line
219
- else:
220
- answer_text += " " + line
221
- i += 1
222
- if current_question is not None and question_text:
223
- ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
224
- for q in ref_dict:
225
- if not ref_dict[q]["question"].strip().endswith('?'):
226
- ref_dict[q]["question"] += '?'
227
- return ref_dict
228
-
229
- def parse_student_answers(text):
230
- stud_dict = {}
231
- lines = text.splitlines()
232
- for line in lines:
233
- line = line.strip()
234
- if not line:
235
- continue
236
- match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
237
- if match:
238
- stud_dict[int(match.group(1))] = match.group(2).strip()
239
- continue
240
- match = re.match(r'^(\d+)[\.|\)][\s]*(.+)', line)
241
- if match:
242
- stud_dict[int(match.group(1))] = match.group(2).strip()
243
- return stud_dict
244
-
245
- def print_parsed_answers(ref_dict, stud_dict):
246
- print("\n" + "="*80)
247
- print("PARSED QUESTIONS AND ANSWERS".center(80))
248
- print("="*80)
249
- for q in sorted(ref_dict.keys()):
250
- print(f"\nQuestion {q}:")
251
- print(f" Question text: {ref_dict[q]['question']}")
252
- print(f" Reference answer: {ref_dict[q]['answer']}")
253
- print(f" Student answer: {stud_dict.get(q, 'No answer provided')}")
254
- print("\nMissing reference questions:", set(stud_dict.keys()) - set(ref_dict.keys()))
255
- print("Missing student answers:", set(ref_dict.keys()) - set(stud_dict.keys()))
256
- print("="*80 + "\n")
257
-
258
- def display_results_in_terminal(results, mcq_results=None):
259
- print("\n" + "="*80)
260
- print("GRADING DETAILS".center(80))
261
- print("="*80)
262
- if results:
263
- print("\nFREE-TEXT ANSWERS GRADING:\n")
264
- for r in results:
265
- print(f"Question {r['Question Number']}:")
266
- print(f" Subject: {r['Subject']}")
267
- print(f" Similarity Score: {r['Similarity']:.2f}")
268
- print(f" Grade: {r['Grade']:.1f}")
269
- print(f" Mark: {r['Mark']}")
270
- print("-"*70)
271
- if mcq_results:
272
- print("\nMCQ ANSWERS GRADING:\n")
273
- print(f"Correct Questions: {mcq_results['Correct Questions']}")
274
- print(f"Incorrect Questions: {mcq_results['Incorrect Questions']}")
275
- print(f"Total Grade: {mcq_results['Total Grade']:.1f}")
276
- print(f"Letter Grade: {mcq_results['Letter Grade']}")
277
- print("="*80 + "\n")
278
-
279
- def grade_answers(ref_dict, stud_dict, advice_list, threshold=0.8, max_grade=100):
280
- results, total, p_adv, t_adv = [], 0, "", ""
281
- for q in sorted(ref_dict):
282
- entry = ref_dict[q]
283
- sim = compute_similarity(
284
- preprocess_text(entry['answer']),
285
- preprocess_text(stud_dict.get(q, ''))
286
- )
287
- grade, mark = advanced_grade(entry['answer'], stud_dict.get(q, ''), sim, threshold, max_grade)
288
- total += grade
289
- adv = get_advice(grade, classify_subject(entry['question']), advice_list)
290
- if not p_adv and adv['advice_parent']:
291
- p_adv = adv['advice_parent']
292
- if not t_adv and adv['advice_teacher']:
293
- t_adv = adv['advice_teacher']
294
- results.append(OrderedDict([
295
- ("Question Number", q),
296
- ("Question", entry['question']),
297
- ("Subject", classify_subject(entry['question'])),
298
- ("Reference", entry['answer']),
299
- ("Student", stud_dict.get(q, 'No answer provided')),
300
- ("Similarity", sim),
301
- ("Grade", grade),
302
- ("Mark", mark),
303
- ("Advice for Parents", adv['advice_parent']),
304
- ("Advice for Teachers", adv['advice_teacher']),
305
- ("Study Plan", adv['study_plan']),
306
- ("Recommended Books", adv['recommended_books'])
307
- ]))
308
- overall = total / len(ref_dict) if ref_dict else 0
309
- display_results_in_terminal(results)
310
- return results, overall, numeric_to_letter_grade(overall), \
311
- (p_adv or "Encourage your child to review areas where they struggled."), \
312
- (t_adv or "Consider focusing additional instruction on areas where the student showed weakness.")
313
-
314
- def extract_mcq_answers_from_image(image, num_questions=None):
315
- margin, vgap, header = 50, 60, 60
316
- if num_questions is None:
317
- num_questions = (image.shape[0] - 2*margin - header) // vgap
318
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
319
- _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
320
- answers = {}
321
- for i in range(1, num_questions + 1):
322
- y = margin + header + (i - 1) * vgap
323
- for idx, opt in enumerate(["A", "B", "C", "D"]):
324
- x = margin + 50 + idx * 100
325
- r = 15
326
- reg = thresh[y-r:y+r, x-r:x+r]
327
- if reg.size and np.mean(reg) < 150:
328
- answers[i] = opt
329
- break
330
- return answers
331
-
332
- def numeric_to_letter_grade(grade):
333
- if grade >= 90: return "A+"
334
- if grade >= 85: return "A"
335
- if grade >= 80: return "A-"
336
- if grade >= 75: return "B+"
337
- if grade >= 70: return "B"
338
- if grade >= 65: return "B-"
339
- if grade >= 60: return "C+"
340
- if grade >= 50: return "C"
341
- if grade >= 40: return "D+"
342
- if grade >= 30: return "D"
343
- return "F"
344
-
345
- def grade_mcq_answers(correct_dict, student_dict, points_per_question=1):
346
- correct, incorrect = [], []
347
- score = 0
348
- for q in sorted(correct_dict):
349
- if student_dict.get(q) == correct_dict[q]:
350
- correct.append(q)
351
- score += points_per_question
352
- else:
353
- incorrect.append(q)
354
- total = (score / (len(correct_dict) * points_per_question)) * 100 if correct_dict else 0
355
- return {"Correct Questions": correct,
356
- "Incorrect Questions": incorrect,
357
- "Total Grade": total,
358
- "Letter Grade": numeric_to_letter_grade(total)}
359
-
360
- def generate_random_id():
361
- return random.randint(10000, 99999)
362
-
363
- @app.route('/grade_exam', methods=['POST'])
364
- def grade_exam():
365
- if 'ref_image' not in request.files or 'stud_image' not in request.files:
366
- return Response(json.dumps({"Error": "Missing one or both image files."}), status=400, mimetype='application/json')
367
- ref_file = request.files['ref_image']
368
- stud_file = request.files['stud_image']
369
- ref_bytes = np.frombuffer(ref_file.read(), np.uint8)
370
- stud_bytes = np.frombuffer(stud_file.read(), np.uint8)
371
- ref_img = cv2.imdecode(ref_bytes, cv2.IMREAD_COLOR)
372
- stud_img = cv2.imdecode(stud_bytes, cv2.IMREAD_COLOR)
373
- if ref_img is None or stud_img is None:
374
- return Response(json.dumps({"Error": "One or both images could not be processed."}), status=400, mimetype='application/json')
375
- margin, vgap, header = 50, 60, 60
376
- computed_questions = (ref_img.shape[0] - 2*margin - header) // vgap
377
- mcq_ref = extract_mcq_answers_from_image(ref_img, num_questions=computed_questions)
378
- mcq_stud = extract_mcq_answers_from_image(stud_img, num_questions=computed_questions)
379
- if len(mcq_ref) >= computed_questions // 2 and len(mcq_stud) >= computed_questions // 2:
380
- mcq_result = grade_mcq_answers(mcq_ref, mcq_stud)
381
- total_grade = mcq_result["Total Grade"]
382
- letter_grade = mcq_result["Letter Grade"]
383
- parent_advice = "Review incorrect answers with your child and focus on identified knowledge gaps."
384
- teacher_advice = "Consider revisiting topics with high error rates in upcoming lessons."
385
- display_results_in_terminal(None, mcq_result)
386
- else:
387
- advice_file = 'data/advice.csv'
388
- ref_text = ocr_from_array(ref_img)
389
- stud_text = ocr_from_array(stud_img)
390
- ref_answers = parse_reference_answers(ref_text)
391
- stud_answers = parse_student_answers(stud_text)
392
- print_parsed_answers(ref_answers, stud_answers)
393
- advice_list = load_advice(advice_file)
394
- results, total_grade, letter_grade, parent_advice, teacher_advice = grade_answers(
395
- ref_answers, stud_answers, advice_list, threshold=0.8, max_grade=100
396
- )
397
- exam_id = request.form.get("examId")
398
- student_idg = request.form.get("StudentIDg")
399
- parent_id = request.form.get("parentId")
400
- teacher_id = request.form.get("teacherId")
401
- grade_payload = {
402
- "id": str(generate_random_id()),
403
- "examId": exam_id,
404
- "obtainedMarks": str(total_grade),
405
- "grade": letter_grade,
406
- "StudentIDg": student_idg
407
- }
408
- advice_payload = {
409
- "id": str(generate_random_id()),
410
- "parentAdvice": parent_advice,
411
- "teacherAdvice": teacher_advice,
412
- "parentId": parent_id,
413
- "teacherId": teacher_id
414
- }
415
- try:
416
- grade_resp = requests.post("http://54.242.19.19:3000/api/grades/", json=grade_payload)
417
- advice_resp = requests.post("http://54.242.19.19:3000/api/advices/create/", json=advice_payload)
418
- print("β†’ Posted grade payload:", json.dumps(grade_payload, indent=2))
419
- print("β†’ Grade API response:", grade_resp.status_code, grade_resp.text)
420
- print("β†’ Posted advice payload:", json.dumps(advice_payload, indent=2))
421
- print("β†’ Advice API response:", advice_resp.status_code, advice_resp.text)
422
- except Exception as e:
423
- print("Error sending to external APIs:", e)
424
- return Response(
425
- json.dumps({"status": "ok", "message": "Grade and advice sent to external services."}),
426
- status=200, mimetype="application/json"
427
- )
428
-
429
- if __name__ == '__main__':
430
- port = int(os.environ.get("PORT", 7860))
431
- app.run(host="0.0.0.0", port=port, debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import cv2
3
+ import spacy
4
+ import numpy as np
5
+ import os
6
+ import string
7
+ import csv
8
+ import random
9
+ import json
10
+ import requests
11
+ from collections import OrderedDict
12
+ from flask import Flask, request, Response
13
+ from paddleocr import PaddleOCR
14
+ from sentence_transformers import SentenceTransformer, util
15
+ from transformers import pipeline
16
+
17
+ # Ensure the language model is available
18
+ try:
19
+ import en_core_web_md
20
+ except ImportError:
21
+ print("en_core_web_md not found. Downloading now...")
22
+ import spacy.cli
23
+ spacy.cli.download("en_core_web_md")
24
+ import en_core_web_md
25
+
26
+ # Load the model using one method.
27
+ nlp = en_core_web_md.load()
28
+
29
+ # Initialize other components
30
+ ochr = PaddleOCR(use_angle_cls=True, lang='en')
31
+ sbert_model = SentenceTransformer("all-mpnet-base-v2")
32
+ entailment_classifier = pipeline(
33
+ "text-classification",
34
+ model="roberta-large-mnli",
35
+ return_all_scores=True
36
+ )
37
+
38
+ app = Flask(__name__)
39
+
40
+ def classify_subject(question, candidate_labels=None):
41
+ if candidate_labels is None:
42
+ candidate_labels = ["Math", "Science", "History", "Literature", "Geography", "Art"]
43
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
44
+ result = classifier(question, candidate_labels)
45
+ return result["labels"][0]
46
+
47
+ def load_advice(filename):
48
+ advice_list = []
49
+ try:
50
+ with open(filename, newline='', encoding='utf-8') as csvfile:
51
+ reader = csv.DictReader(csvfile)
52
+ for row in reader:
53
+ advice_list.append({
54
+ "min_score": float(row["min_score"]),
55
+ "max_score": float(row["max_score"]),
56
+ "subject": row["subject"],
57
+ "advice_parent": row["advice_parent"],
58
+ "advice_teacher": row["advice_teacher"],
59
+ "study_plan": row["study_plan"],
60
+ "recommended_books": row["recommended_books"]
61
+ })
62
+ except Exception as e:
63
+ print("Advice file error:", e)
64
+ return advice_list
65
+
66
+ def get_advice(score, subject, advice_list):
67
+ filtered = [a for a in advice_list
68
+ if a["subject"].lower() == subject.lower()
69
+ and a["min_score"] <= score <= a["max_score"]]
70
+ if filtered:
71
+ return random.choice(filtered)
72
+ return {
73
+ "advice_parent": "No parent advice available.",
74
+ "advice_teacher": "No teacher advice available.",
75
+ "study_plan": "No study plan available.",
76
+ "recommended_books": "No books available."
77
+ }
78
+
79
+ def ocr_from_array(image):
80
+ image = np.ascontiguousarray(image)
81
+ try:
82
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
83
+ except Exception as e:
84
+ print("Error converting image to grayscale:", e)
85
+ return ""
86
+ result = ochr.ocr(gray, cls=True)
87
+
88
+ # If result is None or empty, log and return an empty string.
89
+ if not result or not result[0]:
90
+ print("PaddleOCR returned no results for this image.")
91
+ return ""
92
+
93
+ # Join the detected text parts.
94
+ try:
95
+ # This assumes result[0] contains the OCR detections for the image.
96
+ return "\n".join([line[1][0] for line in result[0]])
97
+ except Exception as e:
98
+ print("Error processing OCR result:", e)
99
+ return ""
100
+
101
+
102
+ def preprocess_text(text):
103
+ return " ".join(
104
+ token.lemma_ for token in nlp(text.lower())
105
+ if not token.is_stop and not token.is_punct
106
+ )
107
+
108
+ def text_to_vector_sbert(text):
109
+ return sbert_model.encode(text, convert_to_tensor=True)
110
+
111
+ def compute_similarity(text1, text2):
112
+ return util.pytorch_cos_sim(
113
+ text_to_vector_sbert(text1),
114
+ text_to_vector_sbert(text2)
115
+ ).item()
116
+
117
+ def contains_keyword(reference, student):
118
+ tr = str.maketrans('', '', string.punctuation)
119
+ return bool(
120
+ set(reference.lower().translate(tr).split()) &
121
+ set(student.lower().translate(tr).split())
122
+ )
123
+
124
+ def check_entailment(student, reference):
125
+ scores = entailment_classifier(f"{student} </s></s> {reference}", truncation=True)
126
+ for item in scores[0]:
127
+ if item["label"] == "ENTAILMENT":
128
+ return item["score"]
129
+ return 0.0
130
+
131
+ def entity_match(ref_ans, stud_ans):
132
+ return bool({ent.text.lower() for ent in nlp(ref_ans).ents} &
133
+ {ent.text.lower() for ent in nlp(stud_ans).ents})
134
+
135
+ def extract_numbers(text):
136
+ nums = set(re.findall(r'\d+', text))
137
+ words = {"zero": "0", "one": "1", "two": "2", "three": "3",
138
+ "four": "4", "five": "5", "six": "6", "seven": "7",
139
+ "eight": "8", "nine": "9", "ten": "10"}
140
+ for w in text.lower().split():
141
+ tok = w.strip(string.punctuation)
142
+ if tok in words:
143
+ nums.add(words[tok])
144
+ return nums
145
+
146
+ def is_year(text):
147
+ clean = text.strip().replace(".", "")
148
+ years = re.findall(r'\d{4}', clean)
149
+ return len(years) == 1 and re.sub(r'\d{4}', '', clean).strip(string.punctuation + " ") == ""
150
+
151
+ def advanced_grade(ref_ans, stud_ans, similarity, threshold=0.8, max_grade=100):
152
+ min_corr, min_inc = 50, 30
153
+ tr = str.maketrans('', '', string.punctuation)
154
+ r = ref_ans.lower().translate(tr).strip()
155
+ s = stud_ans.lower().translate(tr).strip()
156
+ base = similarity * max_grade
157
+ if is_year(ref_ans):
158
+ ref_years = re.findall(r'\d{4}', ref_ans)
159
+ stud_years = re.findall(r'\d{4}', stud_ans)
160
+ if not stud_years or ref_years[0] != stud_years[0]:
161
+ grade = min_inc if contains_keyword(ref_ans, stud_ans) else 0
162
+ mark = "Incorrect"
163
+ else:
164
+ grade, mark = max_grade, "Correct"
165
+ elif r == s or (len(s.split()) <= 3 and contains_keyword(ref_ans, stud_ans)) or \
166
+ (extract_numbers(stud_ans) & extract_numbers(ref_ans)) or \
167
+ check_entailment(stud_ans, ref_ans) > 0.9:
168
+ grade, mark = max_grade, "Correct"
169
+ elif entity_match(ref_ans, stud_ans) or (contains_keyword(ref_ans, stud_ans) and similarity < threshold):
170
+ grade = max(base, threshold * max_grade)
171
+ mark = "Correct"
172
+ elif contains_keyword(ref_ans, stud_ans) or similarity >= threshold:
173
+ grade = min(base + 10, max_grade)
174
+ mark = "Correct"
175
+ else:
176
+ grade = max(base, min_inc) if contains_keyword(ref_ans, stud_ans) else base
177
+ mark = "Incorrect"
178
+ if mark == "Correct":
179
+ rw, sw = len(ref_ans.split()), len(stud_ans.split())
180
+ if rw > 0 and sw < rw:
181
+ grade = max(min_corr, grade * (sw / rw))
182
+ return grade, mark
183
+
184
+ def correct_token(token):
185
+ rep = {'o':'0','O':'0','l':'1','I':'1','|':'1','z':'2','Z':'2',
186
+ 'e':'3','E':'3','a':'4','A':'4','y':'4','Y':'4','s':'5','S':'5',
187
+ 'g':'6','G':'6','t':'7','T':'7','b':'8','B':'8','q':'9','Q':'9'}
188
+ return ''.join(rep.get(c, c) for c in token)
189
+
190
+ def fix_question_prefix(line):
191
+ if not line:
192
+ return line
193
+ first, rest = line[0], line[1:]
194
+ mapping = {'I': '1', 'l': '1', '|': '1', 'S': '5', 's': '5'}
195
+ if first in mapping and rest and rest[0] in ".- )":
196
+ return mapping[first] + rest
197
+ return line
198
+
199
+ def parse_reference_answers(text):
200
+ ref_dict = {}
201
+ lines = text.splitlines()
202
+ current_question = None
203
+ question_text = ""
204
+ answer_text = ""
205
+ i = 0
206
+ while i < len(lines):
207
+ raw_line = lines[i]
208
+ line = fix_question_prefix(raw_line).strip()
209
+ if not line:
210
+ i += 1
211
+ continue
212
+ q_match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
213
+ if q_match:
214
+ if current_question is not None and question_text:
215
+ ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
216
+ current_question = int(q_match.group(1))
217
+ question_text = q_match.group(2).strip()
218
+ answer_text = ""
219
+ if i + 1 < len(lines) and "answer" in lines[i+1].lower():
220
+ answer_line = fix_question_prefix(lines[i+1]).strip()
221
+ answer_match = re.match(r'^answer:?[ \t]*(.+)', answer_line, re.IGNORECASE)
222
+ if answer_match:
223
+ answer_text = answer_match.group(1).strip()
224
+ i += 2
225
+ continue
226
+ i += 1
227
+ continue
228
+ a_match = re.match(r'^answer:?[ \t]*(.+)', line, re.IGNORECASE)
229
+ if a_match and current_question is not None:
230
+ answer_text = a_match.group(1).strip()
231
+ i += 1
232
+ continue
233
+ if current_question is not None:
234
+ if not answer_text:
235
+ question_text += " " + line
236
+ else:
237
+ answer_text += " " + line
238
+ i += 1
239
+ if current_question is not None and question_text:
240
+ ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
241
+ for q in ref_dict:
242
+ if not ref_dict[q]["question"].strip().endswith('?'):
243
+ ref_dict[q]["question"] += '?'
244
+ return ref_dict
245
+
246
+ def parse_student_answers(text):
247
+ stud_dict = {}
248
+ lines = text.splitlines()
249
+ for line in lines:
250
+ line = line.strip()
251
+ if not line:
252
+ continue
253
+ match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
254
+ if match:
255
+ stud_dict[int(match.group(1))] = match.group(2).strip()
256
+ continue
257
+ match = re.match(r'^(\d+)[\.|\)][\s]*(.+)', line)
258
+ if match:
259
+ stud_dict[int(match.group(1))] = match.group(2).strip()
260
+ return stud_dict
261
+
262
+ def print_parsed_answers(ref_dict, stud_dict):
263
+ print("\n" + "="*80)
264
+ print("PARSED QUESTIONS AND ANSWERS".center(80))
265
+ print("="*80)
266
+ for q in sorted(ref_dict.keys()):
267
+ print(f"\nQuestion {q}:")
268
+ print(f" Question text: {ref_dict[q]['question']}")
269
+ print(f" Reference answer: {ref_dict[q]['answer']}")
270
+ print(f" Student answer: {stud_dict.get(q, 'No answer provided')}")
271
+ print("\nMissing reference questions:", set(stud_dict.keys()) - set(ref_dict.keys()))
272
+ print("Missing student answers:", set(ref_dict.keys()) - set(stud_dict.keys()))
273
+ print("="*80 + "\n")
274
+
275
+ def display_results_in_terminal(results, mcq_results=None):
276
+ print("\n" + "="*80)
277
+ print("GRADING DETAILS".center(80))
278
+ print("="*80)
279
+ if results:
280
+ print("\nFREE-TEXT ANSWERS GRADING:\n")
281
+ for r in results:
282
+ print(f"Question {r['Question Number']}:")
283
+ print(f" Subject: {r['Subject']}")
284
+ print(f" Similarity Score: {r['Similarity']:.2f}")
285
+ print(f" Grade: {r['Grade']:.1f}")
286
+ print(f" Mark: {r['Mark']}")
287
+ print("-"*70)
288
+ if mcq_results:
289
+ print("\nMCQ ANSWERS GRADING:\n")
290
+ print(f"Correct Questions: {mcq_results['Correct Questions']}")
291
+ print(f"Incorrect Questions: {mcq_results['Incorrect Questions']}")
292
+ print(f"Total Grade: {mcq_results['Total Grade']:.1f}")
293
+ print(f"Letter Grade: {mcq_results['Letter Grade']}")
294
+ print("="*80 + "\n")
295
+
296
+ def grade_answers(ref_dict, stud_dict, advice_list, threshold=0.8, max_grade=100):
297
+ results, total, p_adv, t_adv = [], 0, "", ""
298
+ for q in sorted(ref_dict):
299
+ entry = ref_dict[q]
300
+ sim = compute_similarity(
301
+ preprocess_text(entry['answer']),
302
+ preprocess_text(stud_dict.get(q, ''))
303
+ )
304
+ grade, mark = advanced_grade(entry['answer'], stud_dict.get(q, ''), sim, threshold, max_grade)
305
+ total += grade
306
+ adv = get_advice(grade, classify_subject(entry['question']), advice_list)
307
+ if not p_adv and adv['advice_parent']:
308
+ p_adv = adv['advice_parent']
309
+ if not t_adv and adv['advice_teacher']:
310
+ t_adv = adv['advice_teacher']
311
+ results.append(OrderedDict([
312
+ ("Question Number", q),
313
+ ("Question", entry['question']),
314
+ ("Subject", classify_subject(entry['question'])),
315
+ ("Reference", entry['answer']),
316
+ ("Student", stud_dict.get(q, 'No answer provided')),
317
+ ("Similarity", sim),
318
+ ("Grade", grade),
319
+ ("Mark", mark),
320
+ ("Advice for Parents", adv['advice_parent']),
321
+ ("Advice for Teachers", adv['advice_teacher']),
322
+ ("Study Plan", adv['study_plan']),
323
+ ("Recommended Books", adv['recommended_books'])
324
+ ]))
325
+ overall = total / len(ref_dict) if ref_dict else 0
326
+ display_results_in_terminal(results)
327
+ return results, overall, numeric_to_letter_grade(overall), \
328
+ (p_adv or "Encourage your child to review areas where they struggled."), \
329
+ (t_adv or "Consider focusing additional instruction on areas where the student showed weakness.")
330
+
331
+ def extract_mcq_answers_from_image(image, num_questions=None):
332
+ margin, vgap, header = 50, 60, 60
333
+ if num_questions is None:
334
+ num_questions = (image.shape[0] - 2*margin - header) // vgap
335
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
336
+ _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
337
+ answers = {}
338
+ for i in range(1, num_questions + 1):
339
+ y = margin + header + (i - 1) * vgap
340
+ for idx, opt in enumerate(["A", "B", "C", "D"]):
341
+ x = margin + 50 + idx * 100
342
+ r = 15
343
+ reg = thresh[y-r:y+r, x-r:x+r]
344
+ if reg.size and np.mean(reg) < 150:
345
+ answers[i] = opt
346
+ break
347
+ return answers
348
+
349
+ def numeric_to_letter_grade(grade):
350
+ if grade >= 90: return "A+"
351
+ if grade >= 85: return "A"
352
+ if grade >= 80: return "A-"
353
+ if grade >= 75: return "B+"
354
+ if grade >= 70: return "B"
355
+ if grade >= 65: return "B-"
356
+ if grade >= 60: return "C+"
357
+ if grade >= 50: return "C"
358
+ if grade >= 40: return "D+"
359
+ if grade >= 30: return "D"
360
+ return "F"
361
+
362
+ def grade_mcq_answers(correct_dict, student_dict, points_per_question=1):
363
+ correct, incorrect = [], []
364
+ score = 0
365
+ for q in sorted(correct_dict):
366
+ if student_dict.get(q) == correct_dict[q]:
367
+ correct.append(q)
368
+ score += points_per_question
369
+ else:
370
+ incorrect.append(q)
371
+ total = (score / (len(correct_dict) * points_per_question)) * 100 if correct_dict else 0
372
+ return {"Correct Questions": correct,
373
+ "Incorrect Questions": incorrect,
374
+ "Total Grade": total,
375
+ "Letter Grade": numeric_to_letter_grade(total)}
376
+
377
+ def generate_random_id():
378
+ return random.randint(10000, 99999)
379
+
380
+ @app.route('/grade_exam', methods=['POST'])
381
+ def grade_exam():
382
+ if 'ref_image' not in request.files or 'stud_image' not in request.files:
383
+ return Response(json.dumps({"Error": "Missing one or both image files."}), status=400, mimetype='application/json')
384
+ ref_file = request.files['ref_image']
385
+ stud_file = request.files['stud_image']
386
+ ref_bytes = np.frombuffer(ref_file.read(), np.uint8)
387
+ stud_bytes = np.frombuffer(stud_file.read(), np.uint8)
388
+ ref_img = cv2.imdecode(ref_bytes, cv2.IMREAD_COLOR)
389
+ stud_img = cv2.imdecode(stud_bytes, cv2.IMREAD_COLOR)
390
+ if ref_img is None or stud_img is None:
391
+ return Response(json.dumps({"Error": "One or both images could not be processed."}), status=400, mimetype='application/json')
392
+ margin, vgap, header = 50, 60, 60
393
+ computed_questions = (ref_img.shape[0] - 2*margin - header) // vgap
394
+ mcq_ref = extract_mcq_answers_from_image(ref_img, num_questions=computed_questions)
395
+ mcq_stud = extract_mcq_answers_from_image(stud_img, num_questions=computed_questions)
396
+ if len(mcq_ref) >= computed_questions // 2 and len(mcq_stud) >= computed_questions // 2:
397
+ mcq_result = grade_mcq_answers(mcq_ref, mcq_stud)
398
+ total_grade = mcq_result["Total Grade"]
399
+ letter_grade = mcq_result["Letter Grade"]
400
+ parent_advice = "Review incorrect answers with your child and focus on identified knowledge gaps."
401
+ teacher_advice = "Consider revisiting topics with high error rates in upcoming lessons."
402
+ display_results_in_terminal(None, mcq_result)
403
+ else:
404
+ advice_file = 'data/advice.csv'
405
+ ref_text = ocr_from_array(ref_img)
406
+ stud_text = ocr_from_array(stud_img)
407
+ ref_answers = parse_reference_answers(ref_text)
408
+ stud_answers = parse_student_answers(stud_text)
409
+ print_parsed_answers(ref_answers, stud_answers)
410
+ advice_list = load_advice(advice_file)
411
+ results, total_grade, letter_grade, parent_advice, teacher_advice = grade_answers(
412
+ ref_answers, stud_answers, advice_list, threshold=0.8, max_grade=100
413
+ )
414
+ exam_id = request.form.get("examId")
415
+ student_idg = request.form.get("StudentIDg")
416
+ parent_id = request.form.get("parentId")
417
+ teacher_id = request.form.get("teacherId")
418
+ grade_payload = {
419
+ "id": str(generate_random_id()),
420
+ "examId": exam_id,
421
+ "obtainedMarks": str(total_grade),
422
+ "grade": letter_grade,
423
+ "StudentIDg": student_idg
424
+ }
425
+ advice_payload = {
426
+ "id": str(generate_random_id()),
427
+ "parentAdvice": parent_advice,
428
+ "teacherAdvice": teacher_advice,
429
+ "parentId": parent_id,
430
+ "teacherId": teacher_id
431
+ }
432
+ try:
433
+ grade_resp = requests.post("http://54.242.19.19:3000/api/grades/", json=grade_payload)
434
+ advice_resp = requests.post("http://54.242.19.19:3000/api/advices/create/", json=advice_payload)
435
+ print("β†’ Posted grade payload:", json.dumps(grade_payload, indent=2))
436
+ print("β†’ Grade API response:", grade_resp.status_code, grade_resp.text)
437
+ print("β†’ Posted advice payload:", json.dumps(advice_payload, indent=2))
438
+ print("β†’ Advice API response:", advice_resp.status_code, advice_resp.text)
439
+ except Exception as e:
440
+ print("Error sending to external APIs:", e)
441
+ return Response(
442
+ json.dumps({"status": "ok", "message": "Grade and advice sent to external services."}),
443
+ status=200, mimetype="application/json"
444
+ )
445
+
446
+ if __name__ == '__main__':
447
+ port = int(os.environ.get("PORT", 7860))
448
+ app.run(host="0.0.0.0", port=port, debug=False)