Aidan Phillips commited on
Commit
0885169
·
1 Parent(s): f5893dd

accuracy scoring pretty good

Browse files
Files changed (3) hide show
  1. categories/accuracy.py +146 -0
  2. categories/fluency.py +103 -66
  3. scorer.ipynb +44 -22
categories/accuracy.py CHANGED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+ import torch
4
+ from scipy.spatial.distance import cosine
5
+ from simalign import SentenceAligner
6
+ from transformers import AutoModel, AutoTokenizer
7
+
8
+ # setup global variables on import (bad practice, but whatever)
9
+ # --------------------------------------------------------------
10
+
11
+ aligner = SentenceAligner(model="distilbert-base-multilingual-cased", layer=6)
12
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
13
+ model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
14
+
15
+
16
+ def accuracy(src_sentence: str, trg_sentence: str) -> dict:
17
+ """
18
+ Calculate the accuracy of a translation by comparing the source and target
19
+ sentences.
20
+
21
+ Parameters:
22
+ src_sentence (str): The source sentence.
23
+ trg_sentence (str): The target sentence.
24
+
25
+ Returns:
26
+ dict: A dictionary containing the accuracy score and errors.
27
+ """
28
+ # Preprocess both sentences
29
+ src_sentence = __preprocess_text(src_sentence)
30
+ trg_sentence = __preprocess_text(trg_sentence)
31
+
32
+ r = __get_alignment_score(src_sentence, trg_sentence)
33
+ score = __get_bertscore(src_sentence, trg_sentence)
34
+
35
+ res = {"score": __bertscore_to_percentage(score), "errors": r}
36
+ return res
37
+
38
+
39
+ def __preprocess_text(text: str) -> str:
40
+ """
41
+ Remove punctuation and convert text to lowercase.
42
+
43
+ Parameters:
44
+ text (str): The text to preprocess.
45
+
46
+ Returns:
47
+ str: The preprocessed text.
48
+ """
49
+ # Remove punctuation
50
+ text = text.translate(str.maketrans("", "", string.punctuation))
51
+ # Convert to lowercase
52
+ text = text.lower()
53
+ return text
54
+
55
+
56
+ def __get_bertscore(src_sentence: str, trg_sentence: str) -> float:
57
+ """
58
+ Get the BERTScore between two sentences.
59
+
60
+ Parameters:
61
+ src_sentence (str): The source sentence.
62
+ trg_sentence (str): The target sentence.
63
+
64
+ Returns:
65
+ float: The BERTScore.
66
+ """
67
+ # Tokenize and generate embeddings
68
+ inputs_src = tokenizer(
69
+ src_sentence, return_tensors="pt", padding=True, truncation=True
70
+ )
71
+ inputs_trg = tokenizer(
72
+ trg_sentence, return_tensors="pt", padding=True, truncation=True
73
+ )
74
+
75
+ with torch.no_grad():
76
+ outputs_src = model(**inputs_src)
77
+ outputs_trg = model(**inputs_trg)
78
+
79
+ # Get sentence embeddings by averaging token embeddings (from last hidden state)
80
+ src_embedding = torch.mean(outputs_src.last_hidden_state, dim=1).squeeze().numpy()
81
+ trg_embedding = torch.mean(outputs_trg.last_hidden_state, dim=1).squeeze().numpy()
82
+
83
+ # Calculate cosine similarity (1 - cosine distance)
84
+ similarity = 1 - cosine(src_embedding, trg_embedding)
85
+
86
+ return similarity
87
+
88
+
89
+ def __bertscore_to_percentage(similarity: float) -> float:
90
+ """
91
+ Convert the BERTScore cosine similarity to a percentage score (0-100).
92
+
93
+ Parameters:
94
+ similarity (float): The cosine similarity from BERTScore.
95
+
96
+ Returns:
97
+ int: A score from 0 to 100.
98
+ """
99
+ # Scale the similarity score from [-1, 1] range to [0, 100] (rarely negative)
100
+ scaled_score = max(((similarity) / 2) * 100, 0)
101
+ return round(scaled_score, 2)
102
+
103
+
104
+ def __get_alignment_score(src_sentence: str, trg_sentence: str) -> list:
105
+ """
106
+ Get the alignment score between two sentences.
107
+
108
+ Parameters:
109
+ src_sentence (str): The source sentence.
110
+ trg_sentence (str): The target sentence.
111
+
112
+ Returns:
113
+ list: Mistranslations
114
+ """
115
+ src_list = src_sentence.split()
116
+ trg_list = trg_sentence.split()
117
+
118
+ # The output is a dictionary with different matching methods.
119
+ # Each method has a list of pairs indicating the indexes of aligned words (The alignments are zero-indexed).
120
+ alignments = aligner.get_word_aligns(src_list, trg_list)
121
+
122
+ src_aligns = {x[0] for x in alignments["inter"]}
123
+ trg_aligns = {x[1] for x in alignments["inter"]}
124
+
125
+ mistranslations = []
126
+ for i in range(len(src_list)):
127
+ if i not in src_aligns:
128
+ mistranslations.append(
129
+ {
130
+ "start": i,
131
+ "end": i,
132
+ "message": f"Word {src_list[i]} possibly mistranslated or omitted",
133
+ }
134
+ )
135
+
136
+ for i in range(len(trg_list)):
137
+ if i not in trg_aligns:
138
+ mistranslations.append(
139
+ {
140
+ "start": i,
141
+ "end": i,
142
+ "message": f"Word {trg_list[i]} possibly mistranslated or added erroneously",
143
+ }
144
+ )
145
+
146
+ return mistranslations
categories/fluency.py CHANGED
@@ -1,28 +1,29 @@
1
  import language_tool_python
2
- from transformers import AutoTokenizer, AutoModelForMaskedLM
3
- import torch
4
  import numpy as np
5
  import spacy
 
6
  import wordfreq
 
7
 
8
  # setup global variables on import (bad practice, but whatever)
9
- #--------------------------------------------------------------
10
 
11
  # grammar checker
12
- tool = language_tool_python.LanguageTool('en-US')
13
 
14
  # masked language model and tokenizer from huggingface
15
- model_name="distilbert-base-multilingual-cased"
16
  model = AutoModelForMaskedLM.from_pretrained(model_name)
17
  model.eval()
18
- tokenizer = AutoTokenizer.from_pretrained(model_name) # tokenizer
19
 
20
  # spacy model for parsing
21
  nlp = spacy.load("en_core_web_sm")
22
 
23
- def __get_rarity(word, lang="en") -> float:
 
24
  """
25
- Returns the rarity of a word in the given language. word_freq retuns a value
26
  between 0 and 1, where 1 is the most common word. Therefore, taking the log results
27
  in a value between 0 (log 1 = 0) and -27.63 (log 1e-12). We then negate it so super
28
  rare words have a high score and common words have a low score.
@@ -30,20 +31,21 @@ def __get_rarity(word, lang="en") -> float:
30
  Parameters:
31
  word (str): The word to check.
32
  lang (str): The language to check. Default is "en".
33
-
34
  Returns:
35
  float: The rarity of the word.
36
  """
37
  return -np.log(wordfreq.word_frequency(word, lang) + 1e-12)
38
 
39
- def __produce_groupings(offset_mapping, input_ids):
 
40
  """
41
  Produce groupings of tokens that are part of the same word.
42
 
43
  Parameters:
44
  offset_mapping (list): The offset mapping of the tokens.
45
  input_ids (list): The input ids of the tokens.
46
-
47
  Returns:
48
  list: A list of groupings of tokens.
49
  """
@@ -64,10 +66,11 @@ def __produce_groupings(offset_mapping, input_ids):
64
  # Append final group
65
  if current_group:
66
  res.append(current_group)
67
-
68
  return res
69
 
70
- def pseudo_perplexity(text, threshold=4, max_len=128):
 
71
  """
72
  Calculate the pseudo-perplexity of a text using a masked language model. Return all
73
  words that exceed a threshold of "adjusted awkwardness". The threshold is a measure
@@ -77,7 +80,7 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
77
  text (str): The text to check.
78
  threshold (float): The threshold for awkwardness. Default is 4.
79
  max_len (int): The maximum length of the text. Default is 128.
80
-
81
  Returns:
82
  dict: A dictionary containing the score and errors.
83
  """
@@ -94,7 +97,7 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
94
  for group in word_groups:
95
  # Skip special tokens (CLS and SEP)
96
  if group[0] == 0 or group[-1] == len(input_ids) - 1:
97
- continue
98
 
99
  # Mask the word group
100
  masked = input_ids.clone()
@@ -119,7 +122,9 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
119
  word_loss = -np.sum(log_probs) / len(log_probs)
120
  # Adjust the loss based on the rarity of the word
121
  word = tokenizer.decode(input_ids[group[0]])
122
- word_loss -= 0.6 * __get_rarity(word) # subtract rarity (rare words reduce loss)
 
 
123
  loss_values.append(word_loss)
124
 
125
  # Structure the results for output
@@ -129,22 +134,24 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
129
  for i, l in enumerate(loss_values):
130
  if l < threshold:
131
  continue
132
- errors.append({
133
- "start": i,
134
- "end": i,
135
- "message": f"Adjusted liklihood {l} over threshold {threshold}"
136
- })
 
 
137
 
138
- res = {
139
- "score": __fluency_score(average_loss),
140
- "errors": errors
141
- }
142
 
143
  return res
144
 
145
- def __fluency_score(loss, midpoint=5, steepness=0.3):
 
 
 
146
  """
147
- Transform the loss into a score from 0 to 100. Steepness controls how quickly the
148
  score drops as loss increases and midpoint controls the loss at which the score is
149
  50.
150
 
@@ -152,20 +159,21 @@ def __fluency_score(loss, midpoint=5, steepness=0.3):
152
  loss (float): The loss to transform.
153
  midpoint (float): The loss at which the score is 50. Default is 5.
154
  steepness (float): The steepness of the curve. Default is 0.3.
155
-
156
  Returns:
157
  float: The score from 0 to 100.
158
  """
159
  score = 100 / (1 + np.exp(steepness * (loss - midpoint)))
160
  return round(score, 2)
161
 
162
- def grammar_errors(text) -> tuple[int, list[str]]:
 
163
  """
164
  Check the grammar of a text using a grammar checker and a structural grammar check.
165
 
166
  Parameters:
167
  text (str): The text to check.
168
-
169
  Returns:
170
  dict: A dictionary containing the score and errors.
171
  """
@@ -195,83 +203,112 @@ def grammar_errors(text) -> tuple[int, list[str]]:
195
 
196
  grammar_score = len(r) / len(text.split())
197
 
198
- res = {
199
- "score": __grammar_score_from_prob(grammar_score),
200
- "errors": r
201
- }
202
 
203
  return res
204
 
205
- def __grammar_score_from_prob(error_ratio):
 
206
  """
207
  Transform the number of errors divided by words into a score from 0 to 100.
208
- Steepness controls how quickly the score drops as errors increase.
 
 
 
 
 
209
  """
210
- score = 100*(1-error_ratio)
211
  return round(score, 2)
212
 
213
 
214
- def __check_structural_grammar(text):
 
 
 
 
 
 
 
 
 
215
  doc = nlp(text)
216
  issues = []
217
 
218
  # 1. Missing main verb (ROOT)
219
- root_verbs = [tok for tok in doc if tok.dep_ == "ROOT" and tok.pos_ in {"VERB", "AUX"}]
 
 
220
  if not root_verbs:
221
  root_root = [tok for tok in doc if tok.dep_ == "ROOT"]
222
  token = root_root[0] if root_root else doc[0]
223
- issues.append({
224
- "start": token.i,
225
- "end": token.i + 1,
226
- "message": "Sentence is missing a main verb (no ROOT verb)."
227
- })
 
 
228
 
229
  # 2. Verb(s) present but no subject
230
  verbs = [tok for tok in doc if tok.pos_ in {"VERB", "AUX"}]
231
  subjects = [tok for tok in doc if tok.dep_ in {"nsubj", "nsubjpass"}]
232
  if verbs and not subjects:
233
  for verb in verbs:
234
- issues.append({
235
- "start": verb.i,
236
- "end": verb.i + 1,
237
- "message": "Sentence has verb(s) but no subject (possible fragment)."
238
- })
 
 
239
 
240
  # 3. Dangling prepositions
241
  for tok in doc:
242
  if tok.pos_ == "ADP" and len(list(tok.children)) == 0:
243
- issues.append({
244
- "start": tok.i,
245
- "end": tok.i + 1,
246
- "message": f"Dangling preposition '{tok.text}' (no object or complement)."
247
- })
 
 
248
 
249
  # 4. Noun pile-up (no verbs, all tokens are nominal)
250
- if not any(tok.pos_ in {"VERB", "AUX"} for tok in doc) and \
251
- all(tok.pos_ in {"NOUN", "PROPN", "ADJ", "DET", "NUM"} for tok in doc if tok.is_alpha):
 
 
 
252
  token = doc[0]
253
- issues.append({
254
- "start": token.i,
255
- "end": token.i + 1,
256
- "message": "Sentence lacks a verb or any verbal structure (nominal phrase pile-up)."
257
- })
 
 
258
 
259
  # 5. Multiple ROOTs (possible run-on)
260
  root_count = sum(1 for tok in doc if tok.dep_ == "ROOT")
261
  if root_count > 1:
262
  for tok in doc:
263
  if tok.dep_ == "ROOT":
264
- issues.append({
265
- "start": tok.i,
266
- "end": tok.i + 1,
267
- "message": "Sentence has multiple ROOTs — possible run-on sentence."
268
- })
 
 
269
 
270
  return issues
271
 
272
 
 
273
  def main():
274
  pass
275
 
 
276
  if __name__ == "__main__":
277
  main()
 
1
  import language_tool_python
 
 
2
  import numpy as np
3
  import spacy
4
+ import torch
5
  import wordfreq
6
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
7
 
8
  # setup global variables on import (bad practice, but whatever)
9
+ # --------------------------------------------------------------
10
 
11
  # grammar checker
12
+ tool = language_tool_python.LanguageTool("en-US")
13
 
14
  # masked language model and tokenizer from huggingface
15
+ model_name = "distilbert-base-multilingual-cased"
16
  model = AutoModelForMaskedLM.from_pretrained(model_name)
17
  model.eval()
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name) # tokenizer
19
 
20
  # spacy model for parsing
21
  nlp = spacy.load("en_core_web_sm")
22
 
23
+
24
+ def __get_rarity(word: str, lang: str = "en") -> float:
25
  """
26
+ Returns the rarity of a word in the given language. word_freq retuns a value
27
  between 0 and 1, where 1 is the most common word. Therefore, taking the log results
28
  in a value between 0 (log 1 = 0) and -27.63 (log 1e-12). We then negate it so super
29
  rare words have a high score and common words have a low score.
 
31
  Parameters:
32
  word (str): The word to check.
33
  lang (str): The language to check. Default is "en".
34
+
35
  Returns:
36
  float: The rarity of the word.
37
  """
38
  return -np.log(wordfreq.word_frequency(word, lang) + 1e-12)
39
 
40
+
41
+ def __produce_groupings(offset_mapping: list, input_ids: list) -> list:
42
  """
43
  Produce groupings of tokens that are part of the same word.
44
 
45
  Parameters:
46
  offset_mapping (list): The offset mapping of the tokens.
47
  input_ids (list): The input ids of the tokens.
48
+
49
  Returns:
50
  list: A list of groupings of tokens.
51
  """
 
66
  # Append final group
67
  if current_group:
68
  res.append(current_group)
69
+
70
  return res
71
 
72
+
73
+ def pseudo_perplexity(text: str, threshold: int = 4, max_len: int = 128) -> dict:
74
  """
75
  Calculate the pseudo-perplexity of a text using a masked language model. Return all
76
  words that exceed a threshold of "adjusted awkwardness". The threshold is a measure
 
80
  text (str): The text to check.
81
  threshold (float): The threshold for awkwardness. Default is 4.
82
  max_len (int): The maximum length of the text. Default is 128.
83
+
84
  Returns:
85
  dict: A dictionary containing the score and errors.
86
  """
 
97
  for group in word_groups:
98
  # Skip special tokens (CLS and SEP)
99
  if group[0] == 0 or group[-1] == len(input_ids) - 1:
100
+ continue
101
 
102
  # Mask the word group
103
  masked = input_ids.clone()
 
122
  word_loss = -np.sum(log_probs) / len(log_probs)
123
  # Adjust the loss based on the rarity of the word
124
  word = tokenizer.decode(input_ids[group[0]])
125
+ word_loss -= 0.6 * __get_rarity(
126
+ word
127
+ ) # subtract rarity (rare words reduce loss)
128
  loss_values.append(word_loss)
129
 
130
  # Structure the results for output
 
134
  for i, l in enumerate(loss_values):
135
  if l < threshold:
136
  continue
137
+ errors.append(
138
+ {
139
+ "start": i,
140
+ "end": i,
141
+ "message": f"Adjusted liklihood {l} over threshold {threshold}",
142
+ }
143
+ )
144
 
145
+ res = {"score": __fluency_score(average_loss), "errors": errors}
 
 
 
146
 
147
  return res
148
 
149
+
150
+ def __fluency_score(
151
+ loss: float, midpoint: float = 5.0, steepness: float = 0.3
152
+ ) -> float:
153
  """
154
+ Transform the loss into a score from 0 to 100. Steepness controls how quickly the
155
  score drops as loss increases and midpoint controls the loss at which the score is
156
  50.
157
 
 
159
  loss (float): The loss to transform.
160
  midpoint (float): The loss at which the score is 50. Default is 5.
161
  steepness (float): The steepness of the curve. Default is 0.3.
162
+
163
  Returns:
164
  float: The score from 0 to 100.
165
  """
166
  score = 100 / (1 + np.exp(steepness * (loss - midpoint)))
167
  return round(score, 2)
168
 
169
+
170
+ def grammar_errors(text: str) -> dict:
171
  """
172
  Check the grammar of a text using a grammar checker and a structural grammar check.
173
 
174
  Parameters:
175
  text (str): The text to check.
176
+
177
  Returns:
178
  dict: A dictionary containing the score and errors.
179
  """
 
203
 
204
  grammar_score = len(r) / len(text.split())
205
 
206
+ res = {"score": __grammar_score_from_prob(grammar_score), "errors": r}
 
 
 
207
 
208
  return res
209
 
210
+
211
+ def __grammar_score_from_prob(error_ratio: float) -> float:
212
  """
213
  Transform the number of errors divided by words into a score from 0 to 100.
214
+
215
+ Parameters:
216
+ error_ratio (float): The ratio of errors to words.
217
+
218
+ Returns:
219
+ float: The score from 0 to 100.
220
  """
221
+ score = 100 * (1 - error_ratio)
222
  return round(score, 2)
223
 
224
 
225
+ def __check_structural_grammar(text: str) -> list:
226
+ """
227
+ Check the structural grammar of a text using spaCy.
228
+
229
+ Parameters:
230
+ text (str): The text to check.
231
+
232
+ Returns:
233
+ list: A list of structural grammar errors.
234
+ """
235
  doc = nlp(text)
236
  issues = []
237
 
238
  # 1. Missing main verb (ROOT)
239
+ root_verbs = [
240
+ tok for tok in doc if tok.dep_ == "ROOT" and tok.pos_ in {"VERB", "AUX"}
241
+ ]
242
  if not root_verbs:
243
  root_root = [tok for tok in doc if tok.dep_ == "ROOT"]
244
  token = root_root[0] if root_root else doc[0]
245
+ issues.append(
246
+ {
247
+ "start": token.i,
248
+ "end": token.i + 1,
249
+ "message": "Sentence is missing a main verb (no ROOT verb).",
250
+ }
251
+ )
252
 
253
  # 2. Verb(s) present but no subject
254
  verbs = [tok for tok in doc if tok.pos_ in {"VERB", "AUX"}]
255
  subjects = [tok for tok in doc if tok.dep_ in {"nsubj", "nsubjpass"}]
256
  if verbs and not subjects:
257
  for verb in verbs:
258
+ issues.append(
259
+ {
260
+ "start": verb.i,
261
+ "end": verb.i + 1,
262
+ "message": "Sentence has verb(s) but no subject (possible fragment).",
263
+ }
264
+ )
265
 
266
  # 3. Dangling prepositions
267
  for tok in doc:
268
  if tok.pos_ == "ADP" and len(list(tok.children)) == 0:
269
+ issues.append(
270
+ {
271
+ "start": tok.i,
272
+ "end": tok.i + 1,
273
+ "message": f"Dangling preposition '{tok.text}' (no object or complement).",
274
+ }
275
+ )
276
 
277
  # 4. Noun pile-up (no verbs, all tokens are nominal)
278
+ if not any(tok.pos_ in {"VERB", "AUX"} for tok in doc) and all(
279
+ tok.pos_ in {"NOUN", "PROPN", "ADJ", "DET", "NUM"}
280
+ for tok in doc
281
+ if tok.is_alpha
282
+ ):
283
  token = doc[0]
284
+ issues.append(
285
+ {
286
+ "start": token.i,
287
+ "end": token.i + 1,
288
+ "message": "Sentence lacks a verb or any verbal structure (nominal phrase pile-up).",
289
+ }
290
+ )
291
 
292
  # 5. Multiple ROOTs (possible run-on)
293
  root_count = sum(1 for tok in doc if tok.dep_ == "ROOT")
294
  if root_count > 1:
295
  for tok in doc:
296
  if tok.dep_ == "ROOT":
297
+ issues.append(
298
+ {
299
+ "start": tok.i,
300
+ "end": tok.i + 1,
301
+ "message": "Sentence has multiple ROOTs — possible run-on sentence.",
302
+ }
303
+ )
304
 
305
  return issues
306
 
307
 
308
+ # Unit tests can go here eventually
309
  def main():
310
  pass
311
 
312
+
313
  if __name__ == "__main__":
314
  main()
scorer.ipynb CHANGED
@@ -4,78 +4,100 @@
4
  "cell_type": "code",
5
  "execution_count": 1,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
 
8
  "source": [
9
- "from categories.fluency import *"
 
10
  ]
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 11,
15
  "metadata": {},
16
  "outputs": [
17
  {
18
  "name": "stdout",
19
  "output_type": "stream",
20
  "text": [
21
- "Sentence: caveman speak weird few word good\n"
22
  ]
23
  }
24
  ],
25
  "source": [
26
- "s = input(\"Enter a sentence: \") # Prompt the user to enter a sentence\n",
 
27
  "\n",
28
- "if s == \"\":\n",
29
- " s = \"The cat sat the quickly up apples banana.\"\n",
30
  "\n",
31
- "print(\"Sentence:\", s) # Print the input sentence\n",
32
  "\n",
33
- "err = grammar_errors(s) # Call the function to execute the grammar error checking\n",
34
- "flu = pseudo_perplexity(s, threshold=3.25) # Call the function to execute the fluency checking"
 
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 12,
40
  "metadata": {},
41
  "outputs": [
42
  {
43
  "name": "stdout",
44
  "output_type": "stream",
45
  "text": [
46
- "This sentence does not start with an uppercase letter.: caveman speak\n",
47
- "Perplexity 4.2750282429106585 over threshold 3.25: caveman\n",
48
- "Perplexity 5.191700905668536 over threshold 3.25: few\n",
49
- "Perplexity 3.8370066187600944 over threshold 3.25: good\n"
 
 
 
 
 
 
 
 
 
50
  ]
51
  }
52
  ],
53
  "source": [
54
- "combined_err = err[\"errors\"] + flu[\"errors\"] # Combine the error counts from both functions\n",
55
  "\n",
56
  "for e in combined_err:\n",
57
- " substr = \" \".join(s.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
58
  " print(f\"{e['message']}: {substr}\") # Print the error messages\n"
59
  ]
60
  },
61
  {
62
  "cell_type": "code",
63
- "execution_count": 10,
64
  "metadata": {},
65
  "outputs": [
66
  {
67
  "name": "stdout",
68
  "output_type": "stream",
69
  "text": [
70
- "100.0 80.14\n",
71
- "Fluency Score: 90.07\n"
72
  ]
73
  }
74
  ],
75
  "source": [
76
  "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"] # Calculate the fluency score\n",
77
- "print(err[\"score\"], flu[\"score\"]) # Print the individual scores\n",
78
- "print(\"Fluency Score:\", fluency_score) # Print the fluency score"
 
79
  ]
80
  }
81
  ],
 
4
  "cell_type": "code",
5
  "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "2025-04-08 22:18:10,848 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n",
13
+ "Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n"
14
+ ]
15
+ }
16
+ ],
17
  "source": [
18
+ "from categories.fluency import *\n",
19
+ "from categories.accuracy import *"
20
  ]
21
  },
22
  {
23
  "cell_type": "code",
24
+ "execution_count": 2,
25
  "metadata": {},
26
  "outputs": [
27
  {
28
  "name": "stdout",
29
  "output_type": "stream",
30
  "text": [
31
+ "Sentence: The cat sat the quickly up apples banana.\n"
32
  ]
33
  }
34
  ],
35
  "source": [
36
+ "src_sent = \"Das ist ein Test.\" # Example source sentence\n",
37
+ "trg_sent = input(f\"{src_sent}: \") # Prompt the user to enter a sentence\n",
38
  "\n",
39
+ "if trg_sent == \"\":\n",
40
+ " trg_sent = \"The cat sat the quickly up apples banana.\"\n",
41
  "\n",
42
+ "print(\"Sentence:\", trg_sent) # Print the input sentence\n",
43
  "\n",
44
+ "err = grammar_errors(trg_sent) # Call the function to execute the grammar error checking\n",
45
+ "flu = pseudo_perplexity(trg_sent, threshold=3.1) # Call the function to execute the fluency checking\n",
46
+ "acc = accuracy(src_sent, trg_sent) # Call the function to execute the accuracy checking"
47
  ]
48
  },
49
  {
50
  "cell_type": "code",
51
+ "execution_count": 3,
52
  "metadata": {},
53
  "outputs": [
54
  {
55
  "name": "stdout",
56
  "output_type": "stream",
57
  "text": [
58
+ "An apostrophe may be missing.: apples banana.\n",
59
+ "Adjusted liklihood 4.8056646935577145 over threshold 3.1: sat\n",
60
+ "Adjusted liklihood 4.473408069089179 over threshold 3.1: the\n",
61
+ "Adjusted liklihood 4.732453441503642 over threshold 3.1: quickly\n",
62
+ "Adjusted liklihood 5.1115574262487735 over threshold 3.1: apples\n",
63
+ "Word ist possibly mistranslated or omitted: cat\n",
64
+ "Word ein possibly mistranslated or omitted: sat\n",
65
+ "Word sat possibly mistranslated or added erroneously: sat\n",
66
+ "Word the possibly mistranslated or added erroneously: the\n",
67
+ "Word quickly possibly mistranslated or added erroneously: quickly\n",
68
+ "Word up possibly mistranslated or added erroneously: up\n",
69
+ "Word apples possibly mistranslated or added erroneously: apples\n",
70
+ "Word banana possibly mistranslated or added erroneously: banana.\n"
71
  ]
72
  }
73
  ],
74
  "source": [
75
+ "combined_err = err[\"errors\"] + flu[\"errors\"] + acc[\"errors\"] # Combine the error counts from both functions\n",
76
  "\n",
77
  "for e in combined_err:\n",
78
+ " substr = \" \".join(trg_sent.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
79
  " print(f\"{e['message']}: {substr}\") # Print the error messages\n"
80
  ]
81
  },
82
  {
83
  "cell_type": "code",
84
+ "execution_count": null,
85
  "metadata": {},
86
  "outputs": [
87
  {
88
  "name": "stdout",
89
  "output_type": "stream",
90
  "text": [
91
+ "Fluency Score: 76.61500000000001\n",
92
+ "Accuracy Score: 24.45\n"
93
  ]
94
  }
95
  ],
96
  "source": [
97
  "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"] # Calculate the fluency score\n",
98
+ "print(\"Fluency Score:\", round(fluency_score, 2)) # Print the fluency score\n",
99
+ "\n",
100
+ "print(\"Accuracy Score:\", acc[\"score\"]) # Print the accuracy score"
101
  ]
102
  }
103
  ],