samyak152002 commited on
Commit
241061a
·
verified ·
1 Parent(s): b6f0b25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -228,14 +228,23 @@ def check_language_issues_and_regex(markdown_text_from_pdf: str) -> Dict[str, An
228
 
229
  tool = None
230
  processed_issues: List[Dict[str, Any]] = []
 
231
  try:
232
- tool = language_tool_python.LanguageTool('en-US')
233
  raw_lt_matches = tool.check(text_for_analysis)
234
-
 
 
 
 
 
 
235
  lt_issues_in_range = 0
236
  for idx, match in enumerate(raw_lt_matches):
237
- if match.ruleId == "EN_SPLIT_WORDS_HYPHEN": continue
238
-
 
 
239
  # Filter by content boundaries
240
  if not (content_start_index <= match.offset < content_end_index):
241
  continue
 
228
 
229
  tool = None
230
  processed_issues: List[Dict[str, Any]] = []
231
+
232
  try:
233
+ tool = language_tool_python.LanguageTool('en-US')
234
  raw_lt_matches = tool.check(text_for_analysis)
235
+
236
+ # Define a set of rule IDs to ignore
237
+ rules_to_ignore = {
238
+ "EN_SPLIT_WORDS_HYPHEN", # Existing rule to ignore
239
+ "MORFOLOGIK_RULE_EN_US" # New rule to ignore for spelling mistakes
240
+ }
241
+
242
  lt_issues_in_range = 0
243
  for idx, match in enumerate(raw_lt_matches):
244
+ # Skip if the ruleId is in our set of ignored rules
245
+ if match.ruleId in rules_to_ignore:
246
+ continue
247
+
248
  # Filter by content boundaries
249
  if not (content_start_index <= match.offset < content_end_index):
250
  continue