Sa-m commited on
Commit
11b6240
·
verified ·
1 Parent(s): fb1ce50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -70,9 +70,23 @@ except Exception as e:
70
 
71
 
72
  def Parsing(parsed_text):
73
- parsed_text=parsed_text.name
74
- raw_party =textract.process(parsed_text, encoding='ascii',method='pdfminer')
75
- return clean(raw_party)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
  #Added more stopwords to avoid irrelevant terms
 
70
 
71
 
72
  def Parsing(parsed_text):
73
+ '''
74
+ Process a PDF file and extract its text content
75
+ parsed_text: Can be a file object with a 'name' attribute or a file path string
76
+ '''
77
+ try:
78
+ # Handle different input types
79
+ if hasattr(parsed_text, 'name'):
80
+ file_path = parsed_text.name
81
+ else:
82
+ file_path = parsed_text
83
+
84
+ # Extract text from PDF
85
+ raw_party = textract.process(file_path, encoding='ascii', method='pdfminer')
86
+ return clean(raw_party)
87
+ except Exception as e:
88
+ print(f"Error parsing PDF: {str(e)}")
89
+ return f"Error parsing PDF: {str(e)}"
90
 
91
 
92
  #Added more stopwords to avoid irrelevant terms