Cachoups commited on
Commit
e5c62b5
·
verified ·
1 Parent(s): b30f761

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -169,39 +169,34 @@ def find_sentences_with_keywords(text, keywords):
169
 
170
 
171
  # Main function to process both PDFs based on the Excel file names and the sheet name
172
- def process_pdfs(file1, file2, sheet):
173
- # Derive PDF file names from the Excel file paths
174
- pdf_file1 = file1.replace(".xlsx", ".pdf")
175
- pdf_file2 = file2.replace(".xlsx", ".pdf")
176
- set = {
177
- 'GDP': ['GDP'],
178
- 'HICP': ['HICP'],
179
- 'RRE prices': ['RRE', 'residential'],
180
- 'Unemployment' : ['Unemployment'],
181
- 'CRE prices': ['CRE', 'commercial']
182
- }
183
- # Extract text from both PDFs
184
- pdf_text1,pdf_text2 = extract_and_paragraph(pdf_file1, pdf_file2, False)
185
-
186
- # Find sentences that match the sheet names (used as keywords)
187
- matched_sentences1 = find_sentences_with_keywords(pdf_text1, set[sheet])
188
- matched_sentences2 = find_sentences_with_keywords(pdf_text2, set[sheet])
189
-
190
- # Format the results for output
191
- result = {
192
- "PDF 1": {
193
- "File": pdf_file1,
194
- "Keyword": set[sheet],
195
- "Sentences": matched_sentences1
196
- },
197
- "PDF 2": {
198
- "File": pdf_file2,
199
- "Keyword": set[sheet],
200
- "Sentences": matched_sentences2
201
- }
202
  }
 
203
 
204
- return result
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  stored_paragraphs_1 = []
207
  stored_paragraphs_2 = []
@@ -288,8 +283,13 @@ with gr.Blocks() as demo:
288
  b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
289
  with gr.Row():
290
  with gr.Column():
291
- result = gr.JSON(label="Comparison Result")
 
 
 
 
292
  b2 = gr.Button("Extract text information")
293
- b2.click(fn=process_pdfs, inputs=[file1, file2, sheet], outputs=result)
 
294
 
295
  demo.launch()
 
169
 
170
 
171
  # Main function to process both PDFs based on the Excel file names and the sheet name
172
+ def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
173
+ # Extract text from both PDFs based on the file name
174
+ text1 = extract_text_from_pdf(file1)
175
+ text2 = extract_text_from_pdf(file2)
176
+
177
+ # Use sheet name as the keyword to find relevant sentences
178
+ keywords = {
179
+ 'GDP': ['GDP'],
180
+ 'HICP': ['HICP'],
181
+ 'RRE prices': ['RRE', 'residential'],
182
+ 'CRE prices': ['CRE', 'commercial'],
183
+ 'Unemployment': ['unemployment']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  }
185
+ selected_keywords = keywords.get(sheet, [])
186
 
187
+ # Find sentences containing the keywords
188
+ sentences1 = find_sentences_with_keywords(text1, selected_keywords)
189
+ sentences2 = find_sentences_with_keywords(text2, selected_keywords)
190
+
191
+ # Concatenate all sentences for each PDF
192
+ text_pdf1 = " ".join(sentences1)
193
+ text_pdf2 = " ".join(sentences2)
194
+
195
+ # Perform sentiment analysis on the extracted sentences for each PDF
196
+ result_pdf1 = fin_ext_bis(text_pdf1)
197
+ result_pdf2 = fin_ext_bis(text_pdf2)
198
+
199
+ return result_pdf1, result_pdf2
200
 
201
  stored_paragraphs_1 = []
202
  stored_paragraphs_2 = []
 
283
  b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
284
  with gr.Row():
285
  with gr.Column():
286
+ sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
287
+ with gr.Column():
288
+ sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
289
+
290
+ # Button to extract text from PDFs and perform sentiment analysis
291
  b2 = gr.Button("Extract text information")
292
+ b2.click(fn=process_pdfs_and_analyze_sentiment, inputs=[file1, file2, sheet], outputs=[sentiment_results_pdf1, sentiment_results_pdf2])
293
+
294
 
295
  demo.launch()