Spaces:

Cachoups
/

FinanceReport

Sleeping

App Files Files Community

Cachoups commited on Sep 13, 2024

Commit

e5c62b5

verified ·

1 Parent(s): b30f761

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -33

app.py CHANGED Viewed

@@ -169,39 +169,34 @@ def find_sentences_with_keywords(text, keywords):
 # Main function to process both PDFs based on the Excel file names and the sheet name
-def process_pdfs(file1, file2, sheet):
-    # Derive PDF file names from the Excel file paths
-    pdf_file1 = file1.replace(".xlsx", ".pdf")
-    pdf_file2 = file2.replace(".xlsx", ".pdf")
-    set = {
-    'GDP': ['GDP'],
-    'HICP': ['HICP'],
-    'RRE prices': ['RRE', 'residential'],
-    'Unemployment' : ['Unemployment'],
-    'CRE prices': ['CRE', 'commercial']
-    }
-    # Extract text from both PDFs
-    pdf_text1,pdf_text2 = extract_and_paragraph(pdf_file1, pdf_file2, False)
-    # Find sentences that match the sheet names (used as keywords)
-    matched_sentences1 = find_sentences_with_keywords(pdf_text1, set[sheet])
-    matched_sentences2 = find_sentences_with_keywords(pdf_text2, set[sheet])
-    # Format the results for output
-    result = {
-        "PDF 1": {
-            "File": pdf_file1,
-            "Keyword": set[sheet],
-            "Sentences": matched_sentences1
-        },
-        "PDF 2": {
-            "File": pdf_file2,
-            "Keyword": set[sheet],
-            "Sentences": matched_sentences2
-        }
     }
-    return result
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
@@ -288,8 +283,13 @@ with gr.Blocks() as demo:
         b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
         with gr.Row():
             with gr.Column():
-                result = gr.JSON(label="Comparison Result")
         b2 = gr.Button("Extract text information")
-        b2.click(fn=process_pdfs, inputs=[file1, file2, sheet], outputs=result)
 demo.launch()

 # Main function to process both PDFs based on the Excel file names and the sheet name
+def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
+    # Extract text from both PDFs based on the file name
+    text1 = extract_text_from_pdf(file1)
+    text2 = extract_text_from_pdf(file2)
+    # Use sheet name as the keyword to find relevant sentences
+    keywords = {
+        'GDP': ['GDP'],
+        'HICP': ['HICP'],
+        'RRE prices': ['RRE', 'residential'],
+        'CRE prices': ['CRE', 'commercial'],
+        'Unemployment': ['unemployment']
     }
+    selected_keywords = keywords.get(sheet, [])
+    # Find sentences containing the keywords
+    sentences1 = find_sentences_with_keywords(text1, selected_keywords)
+    sentences2 = find_sentences_with_keywords(text2, selected_keywords)
+    # Concatenate all sentences for each PDF
+    text_pdf1 = " ".join(sentences1)
+    text_pdf2 = " ".join(sentences2)
+    # Perform sentiment analysis on the extracted sentences for each PDF
+    result_pdf1 = fin_ext_bis(text_pdf1)
+    result_pdf2 = fin_ext_bis(text_pdf2)
+    return result_pdf1, result_pdf2
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
         b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
         with gr.Row():
             with gr.Column():
+                sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
+            with gr.Column():
+                sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
+        # Button to extract text from PDFs and perform sentiment analysis
         b2 = gr.Button("Extract text information")
+        b2.click(fn=process_pdfs_and_analyze_sentiment, inputs=[file1, file2, sheet], outputs=[sentiment_results_pdf1, sentiment_results_pdf2])
 demo.launch()