Update app.py
Browse files
app.py
CHANGED
@@ -228,14 +228,23 @@ def check_language_issues_and_regex(markdown_text_from_pdf: str) -> Dict[str, An
|
|
228 |
|
229 |
tool = None
|
230 |
processed_issues: List[Dict[str, Any]] = []
|
|
|
231 |
try:
|
232 |
-
tool = language_tool_python.LanguageTool('en-US')
|
233 |
raw_lt_matches = tool.check(text_for_analysis)
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
lt_issues_in_range = 0
|
236 |
for idx, match in enumerate(raw_lt_matches):
|
237 |
-
if
|
238 |
-
|
|
|
|
|
239 |
# Filter by content boundaries
|
240 |
if not (content_start_index <= match.offset < content_end_index):
|
241 |
continue
|
|
|
228 |
|
229 |
tool = None
|
230 |
processed_issues: List[Dict[str, Any]] = []
|
231 |
+
|
232 |
try:
|
233 |
+
tool = language_tool_python.LanguageTool('en-US')
|
234 |
raw_lt_matches = tool.check(text_for_analysis)
|
235 |
+
|
236 |
+
# Define a set of rule IDs to ignore
|
237 |
+
rules_to_ignore = {
|
238 |
+
"EN_SPLIT_WORDS_HYPHEN", # Existing rule to ignore
|
239 |
+
"MORFOLOGIK_RULE_EN_US" # New rule to ignore for spelling mistakes
|
240 |
+
}
|
241 |
+
|
242 |
lt_issues_in_range = 0
|
243 |
for idx, match in enumerate(raw_lt_matches):
|
244 |
+
# Skip if the ruleId is in our set of ignored rules
|
245 |
+
if match.ruleId in rules_to_ignore:
|
246 |
+
continue
|
247 |
+
|
248 |
# Filter by content boundaries
|
249 |
if not (content_start_index <= match.offset < content_end_index):
|
250 |
continue
|