Kartikeyssj2 commited on
Commit
8543ef4
·
1 Parent(s): 1aad71c

ufdpasfjasf

Browse files
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BASE IMAGE
2
+ FROM python:3.12.3-slim
3
+
4
+ # SET WORKING DIRECTORY
5
+ WORKDIR /app
6
+
7
+ # COPY REQUIREMENTS FILE
8
+ COPY requirements.txt .
9
+
10
+ # INSTALL SYSTEM DEPENDENCIES AND UPGRADE PIP
11
+ RUN apt-get update \
12
+ && apt-get install -y git build-essential libffi-dev libssl-dev python3-dev \
13
+ && pip install --upgrade pip \
14
+ && pip install -r requirements.txt \
15
+ && apt-get clean
16
+
17
+ # COPY ALL CONTENTS OF THE CURRENT DIRECTORY
18
+ COPY . .
19
+
20
+ # RUN THE PYTHON SCRIPT TO DOWNLOAD NECESSARY MODELS AND FILES
21
+ RUN python download_models.py
22
+
23
+ # USE 4 WORKER PROCESSES AND ENABLE LOGGING TO STDOUT
24
+ CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "fast_api:app", "--log-level", "info", "--access-logfile", "-", "--error-logfile", "-"]
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Sv Work
3
- emoji:
4
  colorFrom: indigo
5
- colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: Scoring
3
+ emoji: 👁
4
  colorFrom: indigo
5
+ colorTo: red
6
  sdk: docker
7
  pinned: false
8
  ---
Whisper_Word2Vec_Deployment ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 40c6120d1ba5b73520a1c80ad84c09377663b28f
__pycache__/fast_api.cpython-312.pyc ADDED
Binary file (18.7 kB). View file
 
download_models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gensim.downloader as api
2
+ import os
3
+ import whisper
4
+ import torch
5
+
6
+ # LOAD THE WORD2VEC MODEL
7
+ word_2_vec = api.load('word2vec-google-news-300')
8
+
9
+ # SAVE THE WORD2VEC MODEL LOCALLY
10
+ word_2_vec.save("word2vec-google-news-300.model")
11
+
12
+ # LOAD THE WHISPER MODEL
13
+ model = whisper.load_model("tiny")
14
+
15
+ # SAVE THE WHISPER MODEL LOCALLY USING TORCH
16
+ save_path = "whisper_tiny_model.pt" # CHOOSE YOUR DESIRED FILE NAME
17
+ torch.save(model.state_dict(), save_path) # SAVE MODEL STATE DICTIONARY
fast_api.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from fastapi import FastAPI, UploadFile, File, Form
3
+ from pydantic import BaseModel
4
+ import gensim.downloader as api
5
+ from gensim.models import KeyedVectors
6
+ import torch
7
+ import pickle
8
+ import numpy as np
9
+ from gensim.models import KeyedVectors
10
+
11
+ def load_whisper_model(model_path, device='cpu'):
12
+ # Load model architecture
13
+ model = whisper.model.Whisper(
14
+ whisper.model.ModelDimensions(
15
+ n_mels=80,
16
+ n_audio_ctx=1500,
17
+ n_audio_state=384, # Adjusted to match checkpoint
18
+ n_audio_head=3,
19
+ n_audio_layer=2,
20
+ n_vocab=51865,
21
+ n_text_ctx=448,
22
+ n_text_state=384, # Adjusted to match checkpoint
23
+ n_text_head=3,
24
+ n_text_layer=2
25
+ )
26
+ )
27
+
28
+ # Load state dict
29
+ state_dict = torch.load(model_path, map_location=device, weights_only=True)
30
+ model.load_state_dict(state_dict, strict=False)
31
+
32
+ model.eval()
33
+ return model
34
+
35
+ # Load the saved Word2Vec model
36
+ word2vec_model = KeyedVectors.load("word2vec-google-news-300.model")
37
+ model = load_whisper_model("whisper_tiny_model.pt")
38
+
39
+ def load_model(pickle_file_path: str):
40
+ """Load a model from a pickle file."""
41
+ with open(pickle_file_path, 'rb') as file:
42
+ model = pickle.load(file)
43
+ return model
44
+
45
+ pronunciation_fluency_model = load_model("pronunciation_fluency_v2.pkl")
46
+
47
+ app = FastAPI()
48
+
49
+ def transcribe(audio_file_path: str, model):
50
+ # Load audio and run inference
51
+ result = model.transcribe(audio_file_path)
52
+ return result["text"]
53
+
54
+ @app.post("/transcribe")
55
+ async def transcribe_audio(file: UploadFile = File(...)):
56
+
57
+ # SAVE THE UPLOAD FILE TEMPORARILY
58
+ with open(file.filename, "wb") as buffer:
59
+
60
+ buffer.write(await file.read())
61
+
62
+ # TRANSCRIBE THE AUDIO
63
+ transcription = transcribe(file.filename, model)
64
+
65
+ return { "transcription" : transcription }
66
+
67
+
68
+ def Get_P_F_Score( transcription : str ):
69
+ words = transcription.split()
70
+
71
+ cumulative_vector_representation = [0] * 300
72
+ for word in words:
73
+ if word in word2vec_model:
74
+ cumulative_vector_representation += word2vec_model[word]
75
+
76
+ print( cumulative_vector_representation[ 0 : 5] )
77
+
78
+ print( len( cumulative_vector_representation) )
79
+
80
+ if np.any(np.isnan(cumulative_vector_representation)):
81
+ print("Input contains NaN values, handle missing values before prediction.")
82
+
83
+
84
+ print("\n\n")
85
+
86
+ output = pronunciation_fluency_model.predict( [ cumulative_vector_representation] )
87
+
88
+ print( output )
89
+
90
+ return output
91
+
92
+
93
+ def get_average_vector(sentence):
94
+ # TOKENIZE THE SENTENCE INTO WORDS
95
+ words = sentence.lower().split()
96
+
97
+ # FILTER OUT WORDS NOT IN THE WORD2VEC VOCABULARY
98
+ valid_words = [word for word in words if word in word2vec_model]
99
+
100
+ # RETURN ZERO VECTOR IF NO VALID WORDS FOUND
101
+ if not valid_words:
102
+ return np.zeros(word2vec_model.vector_size)
103
+
104
+ # COMPUTE AVERAGE VECTOR FOR VALID WORDS
105
+ return np.mean([word2vec_model[word] for word in valid_words], axis=0)
106
+
107
+ from sklearn.metrics.pairwise import cosine_similarity
108
+
109
+ def get_similarity_score(topic, transcription ):
110
+ # GET AVERAGE VECTORS FOR BOTH STRINGS
111
+ topic_vector = get_average_vector(topic)
112
+ transcription_vector = get_average_vector(transcription)
113
+
114
+ print("topic vector: " , topic_vector)
115
+
116
+ print(" transcription vector: " , transcription_vector )
117
+
118
+ # RESHAPE VECTORS FOR COSINE SIMILARITY
119
+ topic_vector = topic_vector.reshape(1, -1)
120
+ transcription_vector = transcription_vector.reshape(1, -1)
121
+
122
+ print(" reshaping done ")
123
+
124
+ # COMPUTE COSINE SIMILARITY
125
+ similarity = cosine_similarity(topic_vector, transcription_vector)
126
+
127
+ print(" Similarity: " , similarity )
128
+
129
+ output = similarity[ 0 ][ 0 ]
130
+
131
+ output = max( output , 0 )
132
+
133
+ output = min( 100 , output )
134
+
135
+ # RETURN SIMILARITY SCORE (IT'S A SINGLE VALUE)
136
+ return output
137
+
138
+
139
+
140
+ @app.post("/pronunciation_fluency_score")
141
+
142
+ async def pronunciation_fluency_scoring(
143
+ file: UploadFile = File(...),
144
+ topic: str = File(...)
145
+ ):
146
+ # SAVE THE UPLOAD FILE TEMPORARILY
147
+ with open(file.filename, "wb") as buffer:
148
+
149
+ buffer.write(await file.read())
150
+
151
+ # TRANSCRIBE THE AUDIO
152
+ transcription = transcribe(file.filename, model)
153
+
154
+ pronunciation_fluency_score = Get_P_F_Score( transcription )
155
+
156
+ print( pronunciation_fluency_score)
157
+
158
+ print( type( pronunciation_fluency_score ) )
159
+
160
+ content_score = get_similarity_score( topic , transcription) * 100
161
+
162
+
163
+
164
+
165
+ return {
166
+
167
+ "pronunciation score" : pronunciation_fluency_score[ 0 ][ 0 ] * 10 ,
168
+ "fluency score" : pronunciation_fluency_score[ 0 ][ 1 ] * 10 ,
169
+ "content score" : content_score
170
+ }
171
+
172
+
173
+
174
+ import string
175
+ import asyncio
176
+ import re
177
+ from textblob import TextBlob
178
+ import nltk
179
+
180
+ def is_valid_summary_format(summary: str) -> bool:
181
+ # CHECK IF THE SUMMARY CONTAINS ONLY BULLET POINTS
182
+ if '-' in summary or '*' in summary:
183
+ return True
184
+
185
+ # CHECK IF THE SUMMARY CONSISTS ONLY OF VERY SHORT SENTENCES
186
+ sentences = re.split(r'[.!?]', summary)
187
+ short_sentences = sum(len(sentence.split()) <= 70 for sentence in sentences if sentence.strip())
188
+
189
+ print(" Short Sentences: " , short_sentences )
190
+
191
+ # CONSIDER IT A VALID FORMAT IF MORE THAN HALF OF THE SENTENCES ARE SHORT
192
+ return short_sentences >= len(sentences) / 2
193
+
194
+ def form_score_summary(summary: str) -> float:
195
+ # CONVERT THE SUMMARY TO UPPERCASE
196
+ summary_upper = summary.upper()
197
+
198
+ # REMOVE PUNCTUATION
199
+ summary_clean = re.sub(r'[^\w\s]', '', summary_upper)
200
+
201
+ # COUNT THE NUMBER OF WORDS
202
+ word_count = len(summary_clean.split())
203
+
204
+ # CHECK IF THE SUMMARY FORMAT IS VALID
205
+ valid_format = is_valid_summary_format(summary)
206
+
207
+ print("\n\n word count: ", word_count, " valid_format: ", valid_format)
208
+
209
+ # CALCULATE SCORE BASED ON WORD COUNT AND FORMAT
210
+ if valid_format:
211
+ if 45 <= word_count <= 75:
212
+ if word_count < 50:
213
+ score = 50 + (word_count - 45) * (50 / 5) # Gradual increase from 50
214
+ elif word_count <= 75:
215
+ score = 100 # Best score range
216
+ else:
217
+ score = 100 - (word_count - 70) * (50 / 5) # Gradual decrease from 100
218
+ else:
219
+ score = 0 # Worst score if word count is out of acceptable range
220
+ else:
221
+ score = 0 # Worst score if format is invalid
222
+
223
+ # CLAMP SCORE BETWEEN 0 AND 100
224
+
225
+ score = float( score )
226
+
227
+ return max(0.0, min(100.0, score))
228
+
229
+
230
+
231
+
232
+ def grammar_score(text: str) -> int:
233
+ # Create a TextBlob object
234
+ blob = TextBlob(text)
235
+
236
+ # Check for grammatical errors
237
+ errors = 0
238
+ for sentence in blob.sentences:
239
+ if sentence.correct() != sentence:
240
+ errors += 1
241
+
242
+ print(" \n\n Number of grammatical errors: " , errors )
243
+
244
+ errors *= 5
245
+
246
+ result = 100 - errors
247
+
248
+ return max( 0 , result)
249
+
250
+
251
+ def vocabulary_score(text: str) -> float:
252
+
253
+ print(" Performing vocabulary score \n\n")
254
+
255
+ # Create a TextBlob object
256
+ blob = TextBlob(text)
257
+
258
+ # Extract words from the text
259
+ words = blob.words
260
+
261
+ # Count the total words and correctly spelled words
262
+ total_words = len(words)
263
+ correctly_spelled = sum(1 for word in words if word == TextBlob(word).correct())
264
+
265
+ # Calculate the percentage of correctly spelled words
266
+ if total_words == 0:
267
+ return 0.0 # Avoid division by zero if there are no words
268
+
269
+ percentage_correct = (correctly_spelled / total_words) * 100
270
+
271
+ percentage_correct = min( percentage_correct , 100)
272
+ percentage_correct = max( 0 , percentage_correct )
273
+
274
+ percentage_correct = round( percentage_correct , 2 )
275
+
276
+
277
+ print(" Percentage Correct: " , percentage_correct )
278
+
279
+
280
+ return percentage_correct
281
+
282
+
283
+ @app.post("/summarization_scoring/")
284
+ def summarization_score( essay : str = Form() , summarization : str = Form() ):
285
+
286
+ content_score_result, form_score_result, grammar_score_result, vocabulary_score_result = (
287
+ float( get_similarity_score(essay, summarization) ) * 100,
288
+ float( form_score_summary(summarization) ),
289
+ float( grammar_score(summarization) ),
290
+ float( vocabulary_score(summarization) )
291
+ )
292
+
293
+ print(" Completed \n\n\n ")
294
+
295
+ response = {
296
+
297
+ "Content Score: " : content_score_result ,
298
+ "Form Score: " : form_score_result ,
299
+ "Grammar Score: " : grammar_score_result ,
300
+ "Vocabulary Score: " : vocabulary_score_result ,
301
+ "Overall Summarization Score: " : round( (content_score_result + form_score_result + grammar_score_result + vocabulary_score_result) / 4 , 2)
302
+ }
303
+
304
+ print( response )
305
+
306
+ return response
307
+
308
+
309
+
310
+ '''
311
+ transitional words can significantly contribute to the development, structure, and coherence of a text.
312
+
313
+ Development: Transitional words help to show how ideas build upon each other and progress
314
+ throughout the essay. They can introduce new points, provide examples, or signal a shift in focus.
315
+
316
+ Structure: Transitional words help to organize the text by indicating relationships between
317
+ ideas. They can show cause and effect, compare and contrast, or signal a sequence of events.
318
+
319
+ Coherence: Transitional words help to create a smooth flow between sentences and paragraphs,
320
+ making the text easier to understand and follow. They can clarify connections between
321
+ ideas and prevent the text from feeling disjointed.
322
+ '''
323
+
324
+
325
+ addition_transitional_words = [
326
+ "and", "also", "too", "in addition", "furthermore", "moreover", "besides", "likewise",
327
+ "similarly", "equally important", "not to mention", "as well as", "what's more",
328
+ "on top of that", "to boot", "in the same way", "by the same token", "similarly",
329
+ "likewise", "in a similar vein", "correspondingly", "at the same time", "concurrently",
330
+ "simultaneously", "not only... but also", "both... and", "as well", "and then",
331
+ "and so forth", "and so on"
332
+ ]
333
+ contrast_transitional_words = [
334
+ "but", "however", "nevertheless", "nonetheless", "on the other hand", "on the contrary",
335
+ "in contrast", "conversely", "although", "though", "even though", "despite", "in spite of",
336
+ "regardless of", "while", "whereas", "yet", "still", "even so", "even if", "at the same time",
337
+ "by the same token", "equally", "in common", "similarly", "just like", "just as", "as well as",
338
+ "resemble", "equally", "in common", "by the same token"
339
+ ]
340
+ cause_effect_transitional_words = [
341
+ "because", "since", "as", "due to", "owing to", "thanks to", "on account of",
342
+ "as a result", "consequently", "therefore", "hence", "thus", "so", "accordingly",
343
+ "for this reason", "as a consequence", "in consequence", "in that case",
344
+ "that being the case", "for that reason", "as a result of", "because of",
345
+ "on account of", "owing to", "due to", "thanks to"
346
+ ]
347
+ time_transitional_words = [
348
+ "first", "second", "third", "next", "then", "after", "before", "later", "earlier",
349
+ "previously", "subsequently", "following", "meanwhile", "simultaneously",
350
+ "at the same time", "concurrently", "in the meantime", "in the interim", "afterwards",
351
+ "thereafter", "finally", "lastly", "ultimately", "in conclusion", "to conclude",
352
+ "in summary", "to sum up"
353
+ ]
354
+ emphasis_transitional_words = [
355
+ "indeed", "in fact", "certainly", "assuredly", "without a doubt", "undoubtedly",
356
+ "unquestionably", "undeniably", "absolutely", "positively", "emphatically",
357
+ "decisively", "strongly", "forcefully", "with conviction", "with certainty",
358
+ "with assurance", "without hesitation", "without question", "without fail", "without doubt"
359
+ ]
360
+ example_transitional_words = [
361
+ "for example", "for instance", "such as", "like", "as an illustration", "to illustrate",
362
+ "to demonstrate", "to exemplify", "namely", "specifically", "in particular",
363
+ "particularly", "especially"
364
+ ]
365
+ conclusion_transitional_words = [
366
+ "in conclusion", "to conclude", "in summary", "to sum up", "finally", "lastly",
367
+ "ultimately", "therefore", "hence", "thus", "so", "accordingly", "as a result",
368
+ "consequently"
369
+ ]
370
+ transition_between_sections_transitional_words = [
371
+ "in the following section", "moving on to", "now", "let's explore",
372
+ "turning our attention to", "to delve deeper", "we will now examine",
373
+ "next", "at this point", "at this juncture", "furthermore", "moreover",
374
+ "in addition"
375
+ ]
376
+ miscellaneous_transition_words_list = [
377
+ # Clarification
378
+ "in other words", "that is to say", "namely", "to put it another way",
379
+ "in simpler terms", "to clarify", "to explain further", "to elaborate",
380
+ "to be more specific", "to be more exact",
381
+
382
+ # Concession
383
+ "admittedly", "granted", "of course", "naturally", "it is true that",
384
+ "it must be admitted that", "it cannot be denied that", "it goes without saying that",
385
+
386
+ # Digression
387
+ "by the way", "incidentally", "aside from that", "apart from that",
388
+
389
+ # Repetition
390
+ "again", "once again", "still", "further", "furthermore", "moreover", "in addition"
391
+ ]
392
+ contrast_within_sentence_transitional_words = [
393
+ "but", "however", "nevertheless", "nonetheless", "on the other hand",
394
+ "in contrast", "conversely", "although", "though", "even though",
395
+ "despite", "in spite of", "regardless of", "while", "whereas",
396
+ "yet", "still", "even so", "even if"
397
+ ]
398
+ comparison_transitional_words = [
399
+ "similarly", "likewise", "in the same way", "equally", "in common",
400
+ "by the same token", "just like", "just as", "as well as", "resemble"
401
+ ]
402
+ cause_and_effect_within_sentence_transitional_words = [
403
+ "because", "since", "as", "due to", "owing to", "thanks to",
404
+ "on account of", "as a result", "consequently", "therefore",
405
+ "hence", "thus", "so", "accordingly", "for this reason",
406
+ "as a consequence", "in consequence", "in that case",
407
+ "that being the case", "for that reason", "as a result of",
408
+ "because of", "on account of", "owing to", "due to", "thanks to"
409
+ ]
410
+ emphasis_within_sentence_transitional_words = [
411
+ "indeed", "in fact", "certainly", "assuredly", "without a doubt",
412
+ "undoubtedly", "unquestionably", "undeniably", "absolutely",
413
+ "positively", "emphatically", "decisively", "strongly", "forcefully",
414
+ "with conviction", "with certainty", "with assurance",
415
+ "without hesitation", "without question", "without fail", "without doubt"
416
+ ]
417
+ concession_digression_repetition_transitional_words = [
418
+ # Concession
419
+ "admittedly", "granted", "of course", "naturally",
420
+ "it is true that", "it must be admitted that",
421
+ "it cannot be denied that", "it goes without saying that",
422
+
423
+ # Digression
424
+ "by the way", "incidentally", "aside from that",
425
+ "apart from that",
426
+
427
+ # Repetition
428
+ "again", "once again", "still", "further",
429
+ "furthermore", "moreover", "in addition"
430
+ ]
431
+
432
+ def dsc_score( essay: str ):
433
+ # Normalize the essay
434
+ essay_lower = essay.lower()
435
+
436
+ # Helper function to count occurrences of transitional words
437
+ def count_transitional_words(word_list):
438
+ return sum(essay_lower.count(word) for word in word_list)
439
+
440
+ # Calculate counts for each type of transitional word list
441
+ addition_count = count_transitional_words(addition_transitional_words)
442
+ contrast_count = count_transitional_words(contrast_transitional_words)
443
+ cause_effect_count = count_transitional_words(cause_effect_transitional_words)
444
+ time_count = count_transitional_words(time_transitional_words)
445
+ emphasis_count = count_transitional_words(emphasis_transitional_words)
446
+ example_count = count_transitional_words(example_transitional_words)
447
+ conclusion_count = count_transitional_words(conclusion_transitional_words)
448
+ transition_between_sections_count = count_transitional_words(transition_between_sections_transitional_words)
449
+ misc_count = count_transitional_words(miscellaneous_transition_words_list)
450
+ contrast_within_sentence_count = count_transitional_words(contrast_within_sentence_transitional_words)
451
+ comparison_count = count_transitional_words(comparison_transitional_words)
452
+ cause_and_effect_within_sentence_count = count_transitional_words(cause_and_effect_within_sentence_transitional_words)
453
+ emphasis_within_sentence_count = count_transitional_words(emphasis_within_sentence_transitional_words)
454
+ concession_digression_repetition_count = count_transitional_words(concession_digression_repetition_transitional_words)
455
+
456
+ # Calculate total transitional word count
457
+ total_transitional_count = (
458
+ addition_count + contrast_count + cause_effect_count + time_count +
459
+ emphasis_count + example_count + conclusion_count +
460
+ transition_between_sections_count + misc_count +
461
+ contrast_within_sentence_count + comparison_count +
462
+ cause_and_effect_within_sentence_count + emphasis_within_sentence_count +
463
+ concession_digression_repetition_count
464
+ )
465
+
466
+ print("\n\n\n Total Transitional Words Count: " , total_transitional_count )
467
+
468
+ words = essay.split()
469
+ word_count = len(words)
470
+
471
+ transitional_words_percentage = round( ( total_transitional_count / ( word_count * 1.00) ) * 100 , 2 )
472
+
473
+ print("]n\n\n transitional_words_percentage: " , transitional_words_percentage)
474
+
475
+ '''
476
+ Since a transition_words_percentage of 10% is considered as the ideal percentage of transitional words in an essay,
477
+ we are deducting points with respect to how much is it deviating from its ideal percentage value.
478
+
479
+ This have proven to be powerful to determine the Development, Structure and Coherence in essays
480
+
481
+ '''
482
+ return 100 - abs( transitional_words_percentage - 10 )
483
+
484
+
485
+ def is_capitalized(text: str) -> bool:
486
+ """Check if the entire text is in capital letters."""
487
+ return text.isupper()
488
+
489
+ def contains_punctuation(text: str) -> bool:
490
+ """Check if the text contains any punctuation."""
491
+ return bool(re.search(r'[.,!?;:]', text))
492
+
493
+ def is_bullet_points(text: str) -> bool:
494
+ """Check if the text consists only of bullet points or very short sentences."""
495
+ sentences = text.split('\n')
496
+ bullet_points = any(line.strip().startswith('-') for line in sentences)
497
+ short_sentences = sum(len(sentence.split()) <= 2 for sentence in sentences if sentence.strip())
498
+ return bullet_points or short_sentences > len(sentences) / 2
499
+
500
+
501
+ def form_score_essay(essay: str) -> float:
502
+ # REMOVE PUNCTUATION AND COUNT WORDS
503
+ word_count = len(re.findall(r'\b\w+\b', essay))
504
+
505
+ # CHECK ESSAY FORMAT
506
+ is_capital = is_capitalized(essay)
507
+ has_punctuation = contains_punctuation(essay)
508
+ bullet_points_or_short = is_bullet_points(essay)
509
+
510
+ # CALCULATE SCORE
511
+ if 200 <= word_count <= 300 and has_punctuation and not is_capital and not bullet_points_or_short:
512
+ score = 100.0 # BEST SCORE
513
+ elif (120 <= word_count <= 199 or 301 <= word_count <= 380) and has_punctuation and not is_capital and not bullet_points_or_short:
514
+ score = 50.0 # AVERAGE SCORE
515
+ else:
516
+ score = 0.0 # WORST SCORE
517
+
518
+ return score
519
+
520
+
521
+ @app.post("/essay_scoring/")
522
+ async def essay_score( prompt : str = Form() , essay : str = Form() ):
523
+ content_score_result, form_score_result, dsc_score_result, grammar_score_result = (
524
+ float( get_similarity_score( prompt , essay ) ) * 100,
525
+ float( form_score_essay( essay ) ),
526
+ float( dsc_score( essay ) ),
527
+ float( grammar_score( essay ) )
528
+ )
529
+
530
+ print( essay )
531
+
532
+ return {
533
+
534
+ "Content Score: " : content_score_result,
535
+ "Form Score: " : form_score_result,
536
+ "DSC Score: " : dsc_score_result,
537
+ "Grammar Score: " : grammar_score_result,
538
+ "Overall Essay Score" : ( content_score_result + form_score_result + dsc_score_result + grammar_score_result) / 4.0
539
+ }
pronunciation_fluency_v2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f2404a15f08d5ff7adc3bfb9721b5d4c2e65a05acbcc808a2d2d9d2bd24d57
3
+ size 27837151
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/openai/whisper.git
2
+ whisper
3
+ fastapi
4
+ pydantic
5
+ uvicorn
6
+ python-multipart
7
+ gunicorn
8
+ gensim
9
+ scikit-learn
10
+ numpy
11
+ textblob
12
+ nltk
trasncribe.py ADDED
File without changes
whisper_tiny_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c80a3201cc10ca84a80717069768f68fbab09a35bff458f77a120e4aa210dee
3
+ size 151102205