Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -169,39 +169,34 @@ def find_sentences_with_keywords(text, keywords):
|
|
169 |
|
170 |
|
171 |
# Main function to process both PDFs based on the Excel file names and the sheet name
|
172 |
-
def
|
173 |
-
#
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
pdf_text1,pdf_text2 = extract_and_paragraph(pdf_file1, pdf_file2, False)
|
185 |
-
|
186 |
-
# Find sentences that match the sheet names (used as keywords)
|
187 |
-
matched_sentences1 = find_sentences_with_keywords(pdf_text1, set[sheet])
|
188 |
-
matched_sentences2 = find_sentences_with_keywords(pdf_text2, set[sheet])
|
189 |
-
|
190 |
-
# Format the results for output
|
191 |
-
result = {
|
192 |
-
"PDF 1": {
|
193 |
-
"File": pdf_file1,
|
194 |
-
"Keyword": set[sheet],
|
195 |
-
"Sentences": matched_sentences1
|
196 |
-
},
|
197 |
-
"PDF 2": {
|
198 |
-
"File": pdf_file2,
|
199 |
-
"Keyword": set[sheet],
|
200 |
-
"Sentences": matched_sentences2
|
201 |
-
}
|
202 |
}
|
|
|
203 |
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
stored_paragraphs_1 = []
|
207 |
stored_paragraphs_2 = []
|
@@ -288,8 +283,13 @@ with gr.Blocks() as demo:
|
|
288 |
b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
|
289 |
with gr.Row():
|
290 |
with gr.Column():
|
291 |
-
|
|
|
|
|
|
|
|
|
292 |
b2 = gr.Button("Extract text information")
|
293 |
-
b2.click(fn=
|
|
|
294 |
|
295 |
demo.launch()
|
|
|
169 |
|
170 |
|
171 |
# Main function to process both PDFs based on the Excel file names and the sheet name
|
172 |
+
def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
|
173 |
+
# Extract text from both PDFs based on the file name
|
174 |
+
text1 = extract_text_from_pdf(file1)
|
175 |
+
text2 = extract_text_from_pdf(file2)
|
176 |
+
|
177 |
+
# Use sheet name as the keyword to find relevant sentences
|
178 |
+
keywords = {
|
179 |
+
'GDP': ['GDP'],
|
180 |
+
'HICP': ['HICP'],
|
181 |
+
'RRE prices': ['RRE', 'residential'],
|
182 |
+
'CRE prices': ['CRE', 'commercial'],
|
183 |
+
'Unemployment': ['unemployment']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
}
|
185 |
+
selected_keywords = keywords.get(sheet, [])
|
186 |
|
187 |
+
# Find sentences containing the keywords
|
188 |
+
sentences1 = find_sentences_with_keywords(text1, selected_keywords)
|
189 |
+
sentences2 = find_sentences_with_keywords(text2, selected_keywords)
|
190 |
+
|
191 |
+
# Concatenate all sentences for each PDF
|
192 |
+
text_pdf1 = " ".join(sentences1)
|
193 |
+
text_pdf2 = " ".join(sentences2)
|
194 |
+
|
195 |
+
# Perform sentiment analysis on the extracted sentences for each PDF
|
196 |
+
result_pdf1 = fin_ext_bis(text_pdf1)
|
197 |
+
result_pdf2 = fin_ext_bis(text_pdf2)
|
198 |
+
|
199 |
+
return result_pdf1, result_pdf2
|
200 |
|
201 |
stored_paragraphs_1 = []
|
202 |
stored_paragraphs_2 = []
|
|
|
283 |
b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
|
284 |
with gr.Row():
|
285 |
with gr.Column():
|
286 |
+
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
287 |
+
with gr.Column():
|
288 |
+
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
289 |
+
|
290 |
+
# Button to extract text from PDFs and perform sentiment analysis
|
291 |
b2 = gr.Button("Extract text information")
|
292 |
+
b2.click(fn=process_pdfs_and_analyze_sentiment, inputs=[file1, file2, sheet], outputs=[sentiment_results_pdf1, sentiment_results_pdf2])
|
293 |
+
|
294 |
|
295 |
demo.launch()
|