Spaces:

samyak152002
/

texmetrics-regex-checks-gradio-1

Running

samyak152002 commited on May 17

Commit

241061a

verified ·

1 Parent(s): b6f0b25

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -228,14 +228,23 @@ def check_language_issues_and_regex(markdown_text_from_pdf: str) -> Dict[str, An
     tool = None
     processed_issues: List[Dict[str, Any]] = []
     try:
-        tool = language_tool_python.LanguageTool('en-US')
         raw_lt_matches = tool.check(text_for_analysis)
         lt_issues_in_range = 0
         for idx, match in enumerate(raw_lt_matches):
-            if match.ruleId == "EN_SPLIT_WORDS_HYPHEN": continue
             # Filter by content boundaries
             if not (content_start_index <= match.offset < content_end_index):
                 continue

     tool = None
     processed_issues: List[Dict[str, Any]] = []
     try:
+        tool = language_tool_python.LanguageTool('en-US')
         raw_lt_matches = tool.check(text_for_analysis)
+        # Define a set of rule IDs to ignore
+        rules_to_ignore = {
+            "EN_SPLIT_WORDS_HYPHEN",      # Existing rule to ignore
+            "MORFOLOGIK_RULE_EN_US"       # New rule to ignore for spelling mistakes
+        }
         lt_issues_in_range = 0
         for idx, match in enumerate(raw_lt_matches):
+            # Skip if the ruleId is in our set of ignored rules
+            if match.ruleId in rules_to_ignore:
+                continue
             # Filter by content boundaries
             if not (content_start_index <= match.offset < content_end_index):
                 continue