samyak152002 commited on
Commit
ec35d3e
·
verified ·
1 Parent(s): 810882a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -347,25 +347,36 @@ def analyze_pdf(filepath: str) -> Tuple[Dict[str, Any], bytes]:
347
  if not full_text:
348
  return {"error": "Failed to extract text from PDF."}, None
349
 
350
- # print(full_text)
351
- language_issues = check_language_issues(full_text)
 
 
 
 
 
 
 
 
 
 
 
352
 
353
- # Handle potential errors from check_language_issues
 
354
  if "error" in language_issues:
355
  return {"error": language_issues["error"]}, None
356
 
357
  issues = language_issues.get("issues", [])
358
-
359
  if issues:
360
- language_issues, annotated_pdf = highlight_issues_in_pdf(filepath, issues)
361
- return {"issues": language_issues}, annotated_pdf
 
362
  else:
363
- # Return a meaningful message and no annotated PDF if no issues are found
364
- return {"message": "No language issues found in the uploaded PDF."}, None
 
365
  except Exception as e:
366
- # Return the error message and no annotated PDF
367
  return {"error": str(e)}, None
368
-
369
  # ------------------------------
370
  # Gradio Interface
371
  # ------------------------------
 
347
  if not full_text:
348
  return {"error": "Failed to extract text from PDF."}, None
349
 
350
+ # Create the results structure
351
+ results = {
352
+ "issues": [], # Initialize as empty array
353
+ "regex_checks": {
354
+ "metadata": check_metadata(full_text),
355
+ "disclosures": check_disclosures(full_text),
356
+ "figures_and_tables": check_figures_and_tables(full_text),
357
+ "references": check_references(full_text),
358
+ "structure": check_structure(full_text),
359
+ "figure_order": check_figure_order(full_text),
360
+ "reference_order": check_reference_order(full_text)
361
+ }
362
+ }
363
 
364
+ # Handle language issues
365
+ language_issues = check_language_issues(full_text)
366
  if "error" in language_issues:
367
  return {"error": language_issues["error"]}, None
368
 
369
  issues = language_issues.get("issues", [])
 
370
  if issues:
371
+ language_matches, annotated_pdf = highlight_issues_in_pdf(filepath, issues)
372
+ results["issues"] = language_matches # This is already an array from check_language_issues
373
+ return results, annotated_pdf
374
  else:
375
+ # Keep issues as empty array if none found
376
+ return results, None
377
+
378
  except Exception as e:
 
379
  return {"error": str(e)}, None
 
380
  # ------------------------------
381
  # Gradio Interface
382
  # ------------------------------