Spaces:

samyak152002
/

texmetrics-regex-checks-gradio-1

Running

App Files Files Community

samyak152002 commited on Dec 5, 2024

Commit

a0e200f

verified ·

1 Parent(s): 91e3e31

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -7

app.py CHANGED Viewed

@@ -117,11 +117,13 @@ def check_structure(full_text: str) -> Dict[str, bool]:
     }
 def check_language_issues(full_text: str) -> Dict[str, Any]:
-    """Check for language issues using LanguageTool."""
     try:
         language_tool = language_tool_python.LanguageTool('en-US')
         matches = language_tool.check(full_text)
         issues = []
         for match in matches:
             issues.append({
                 "message": match.message,
@@ -131,10 +133,40 @@ def check_language_issues(full_text: str) -> Dict[str, Any]:
                 "rule_id": match.ruleId,
                 "offset": match.offset,
                 "length": match.errorLength,
-                "coordinates":[],
-                "page":0
             })
         print(f"Total language issues found: {len(issues)}")
         return {
             "total_issues": len(issues),
             "issues": issues
@@ -252,7 +284,7 @@ def highlight_issues_in_pdf(file, language_matches: List[Dict[str, Any]]) -> byt
             if not target_words:
                 print("No matching words found for this issue.")
                 continue
             initial_x = target_words[0][2]
             initial_y = target_words[0][3]
             final_x = target_words[len(target_words)-1][4]
@@ -270,7 +302,7 @@ def highlight_issues_in_pdf(file, language_matches: List[Dict[str, Any]]) -> byt
                 highlight.set_colors(stroke=(1, 1, 0))  # Yellow color
                 highlight.update()
                 print(f"Highlighted '{word_text}' on page {page_num + 1} at position ({x0}, {y0}, {x1}, {y1})")
         # Save annotated PDF to bytes
         byte_stream = io.BytesIO()
@@ -287,6 +319,7 @@ def highlight_issues_in_pdf(file, language_matches: List[Dict[str, Any]]) -> byt
     except Exception as e:
         print(f"Error in highlighting PDF: {e}")
         return b""
 # ------------------------------
 # Main Analysis Function
 # ------------------------------
@@ -341,7 +374,7 @@ def process_upload(file):
     temp_input.write(file)
     temp_input_path = temp_input.name
     print(temp_input_path)
-    # Analyze -inputthe PDF
     results, annotated_pdf = analyze_pdf(temp_input_path)
@@ -366,7 +399,7 @@ def process_upload(file):
     #     }, indent=2)
     #     return error_message, None
 def create_interface():
     with gr.Blocks(title="PDF Analyzer") as interface:
         gr.Markdown("# PDF Analyzer")

     }
 def check_language_issues(full_text: str) -> Dict[str, Any]:
+    """Check for language issues using LanguageTool and additional regex patterns."""
     try:
         language_tool = language_tool_python.LanguageTool('en-US')
         matches = language_tool.check(full_text)
         issues = []
+        # Process LanguageTool matches
         for match in matches:
             issues.append({
                 "message": match.message,
                 "rule_id": match.ruleId,
                 "offset": match.offset,
                 "length": match.errorLength,
+                "coordinates": [],
+                "page": 0
             })
         print(f"Total language issues found: {len(issues)}")
+        # -----------------------------------
+        # Additions: Regex-based Issue Detection
+        # -----------------------------------
+        # Define regex pattern to find words immediately followed by '[' without space
+        regex_pattern = r'\b(\w+)\[(\d+)\]'
+        regex_matches = list(re.finditer(regex_pattern, full_text))
+        print(f"Total regex issues found: {len(regex_matches)}")
+        # Process regex matches
+        for match in regex_matches:
+            word = match.group(1)
+            number = match.group(2)
+            start = match.start()
+            end = match.end()
+            issues.append({
+                "message": f"Missing space before '[' in '{word}[{number}]'. Should be '{word} [{number}]'.",
+                "context": full_text[max(match.start() - 30, 0):min(match.end() + 30, len(full_text))].strip(),
+                "suggestions": [f"{word} [{number}]", f"{word} [`{number}`]", f"{word} [number {number}]"],
+                "category": "Formatting",
+                "rule_id": "SPACE_BEFORE_BRACKET",
+                "offset": match.start(),
+                "length": match.end() - match.start(),
+                "coordinates": [],
+                "page": 0
+            })
+        print(f"Total combined issues found: {len(issues)}")
         return {
             "total_issues": len(issues),
             "issues": issues
             if not target_words:
                 print("No matching words found for this issue.")
                 continue
             initial_x = target_words[0][2]
             initial_y = target_words[0][3]
             final_x = target_words[len(target_words)-1][4]
                 highlight.set_colors(stroke=(1, 1, 0))  # Yellow color
                 highlight.update()
                 print(f"Highlighted '{word_text}' on page {page_num + 1} at position ({x0}, {y0}, {x1}, {y1})")
         # Save annotated PDF to bytes
         byte_stream = io.BytesIO()
     except Exception as e:
         print(f"Error in highlighting PDF: {e}")
         return b""
 # ------------------------------
 # Main Analysis Function
 # ------------------------------
     temp_input.write(file)
     temp_input_path = temp_input.name
     print(temp_input_path)
+    # Analyze the PDF
     results, annotated_pdf = analyze_pdf(temp_input_path)
     #     }, indent=2)
     #     return error_message, None
 def create_interface():
     with gr.Blocks(title="PDF Analyzer") as interface:
         gr.Markdown("# PDF Analyzer")