viboognesh commited on
Commit
f6df22c
·
verified ·
1 Parent(s): 748d430

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -66
app.py CHANGED
@@ -2,9 +2,6 @@ import streamlit as st
2
  from PyPDF2 import PdfReader
3
  from anthropic import Anthropic
4
  from prompts import DIFFERENTIATE_PROMPT
5
- from concurrent.futures import ProcessPoolExecutor
6
-
7
-
8
 
9
  def extract_differences(input_text):
10
  input_text = input_text.strip()
@@ -48,8 +45,6 @@ def get_llm_response(extractedtext1, extractedtext2):
48
 
49
  try:
50
  try:
51
- before_differences = message_text.split("<differences>")[0]
52
- after_differences = message_text.split("</differences>")[1]
53
  differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
54
  except Exception as e:
55
  differences_list = extract_differences(message_text)
@@ -57,36 +52,9 @@ def get_llm_response(extractedtext1, extractedtext2):
57
  print("Error:", e)
58
  return message_text, []
59
 
60
- difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------\n" for d in differences_list])
61
  # display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
62
- display_text = difference_content
63
- return display_text, differences_list
64
-
65
- def process_text_pair(pair):
66
- etext1, etext2 = pair
67
- dtext, pdata = get_llm_response(etext1, etext2)
68
- return dtext, pdata
69
-
70
- def process_concurrently(extracted_text1, extracted_text2):
71
- # Create a pool of worker processes
72
- with ProcessPoolExecutor(max_workers=5) as executor:
73
- # Submit tasks to the pool
74
- futures = [executor.submit(process_text_pair, (etext1, etext2))
75
- for etext1, etext2 in zip(extracted_text1, extracted_text2)]
76
-
77
- # Collect results
78
- display_text = ""
79
- parsed_data = []
80
- for future in futures:
81
- result = future.result()
82
- display_text += result[0]
83
- parsed_data += result[1]
84
-
85
- # Combine results
86
- # display_text = ''.join(display_texts)
87
- # parsed_data = ''.join(parsed_datas)
88
-
89
- return display_text, parsed_data
90
 
91
 
92
  def extract_text_with_pypdf(pdf_path):
@@ -99,32 +67,39 @@ def extract_text_with_pypdf(pdf_path):
99
 
100
  def main():
101
  st.set_page_config(layout="wide") # Enable wide layout
 
 
 
 
 
 
102
  st.markdown('<div style="text-align: center;">' + '<h1>PDF Upload and Compare App</h1>' + '</div>', unsafe_allow_html=True)
103
 
104
  # Create columns for side-by-side buttons
105
  col1, col2 = st.columns([2, 2])
106
 
107
  # Add upload button to left column
108
- uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf")
109
 
110
  # Add upload button to right column
111
- uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf")
 
112
 
113
  # Check if both files are uploaded
114
- if uploaded_file1 and uploaded_file2:
115
  # Get filenames from uploaded files
116
- filename1 = uploaded_file1.name
117
- filename2 = uploaded_file2.name
118
 
119
  try:
120
- extracted_text1 = extract_text_with_pypdf(uploaded_file1)
121
- extracted_text2 = extract_text_with_pypdf(uploaded_file2)
122
  with col1.expander(filename1):
123
  st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text1).splitlines()))
124
 
125
  with col2.expander(filename2):
126
  st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text2).splitlines()))
127
-
128
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
129
  except Exception as e:
130
  st.error(f"Error saving files: {str(e)}")
@@ -132,32 +107,42 @@ def main():
132
  # Add button at the bottom to run Find Differences function
133
  if st.button("Find Differences"):
134
  try:
135
- display_text = ""
136
- parsed_data = []
137
- # for etext1, etext2 in zip(extracted_text1, extracted_text2):
138
- # dtext, pdata = get_llm_response(etext1, etext2)
139
- # display_text += dtext
140
- # parsed_data += pdata
141
- display_text, parsed_data = process_concurrently(extracted_text1, extracted_text2)
142
  # display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
143
- display_text1 = "\n\n------------------------\n\n".join(extracted_text1)
144
- display_text2 = "\n\n------------------------\n\n".join(extracted_text2)
145
-
146
- for diff in parsed_data:
147
- diff_text1 = diff['text1'].strip()
148
- diff_text2 = diff['text2'].strip()
149
- if diff_text1 == "" or diff_text2 == "": continue
150
- diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
151
- diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
152
- display_text1 = diff_text1_phrase.join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1
153
- display_text2 = diff_text2_phrase.join(display_text2.split(diff_text2)) if diff_text2 in display_text2 else display_text2
154
- with col1.expander(f"{filename1} Difference Highlighted"):
155
- st.write("\n\n".join(display_text1.splitlines()), unsafe_allow_html=True)
156
-
157
- with col2.expander(f"{filename2} Difference Highlighted"):
158
- st.write("\n\n".join(display_text2.splitlines()), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
159
 
160
- st.markdown(display_text)
 
 
 
 
 
 
 
161
  except Exception as e:
162
  st.error(f"Error finding differences: {str(e)}")
163
 
 
2
  from PyPDF2 import PdfReader
3
  from anthropic import Anthropic
4
  from prompts import DIFFERENTIATE_PROMPT
 
 
 
5
 
6
  def extract_differences(input_text):
7
  input_text = input_text.strip()
 
45
 
46
  try:
47
  try:
 
 
48
  differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
49
  except Exception as e:
50
  differences_list = extract_differences(message_text)
 
52
  print("Error:", e)
53
  return message_text, []
54
 
 
55
  # display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
56
+ return differences_list
57
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  def extract_text_with_pypdf(pdf_path):
 
67
 
68
  def main():
69
  st.set_page_config(layout="wide") # Enable wide layout
70
+ if "differences_data" not in st.session_state:
71
+ st.session_state.differences_data = []
72
+ if "file1" not in st.session_state:
73
+ st.session_state.file1 = None
74
+ if "file2" not in st.session_state:
75
+ st.session_state.file2 = None
76
  st.markdown('<div style="text-align: center;">' + '<h1>PDF Upload and Compare App</h1>' + '</div>', unsafe_allow_html=True)
77
 
78
  # Create columns for side-by-side buttons
79
  col1, col2 = st.columns([2, 2])
80
 
81
  # Add upload button to left column
82
+ st.session_state.file1 = col1.file_uploader("**Text 1**", type="pdf")
83
 
84
  # Add upload button to right column
85
+ st.session_state.file2 = col2.file_uploader("**Text 2**", type="pdf")
86
+
87
 
88
  # Check if both files are uploaded
89
+ if st.session_state.file1 and st.session_state.file2:
90
  # Get filenames from uploaded files
91
+ filename1 = st.session_state.file1.name
92
+ filename2 = st.session_state.file2.name
93
 
94
  try:
95
+ extracted_text1 = extract_text_with_pypdf(st.session_state.file1)
96
+ extracted_text2 = extract_text_with_pypdf(st.session_state.file2)
97
  with col1.expander(filename1):
98
  st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text1).splitlines()))
99
 
100
  with col2.expander(filename2):
101
  st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text2).splitlines()))
102
+
103
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
104
  except Exception as e:
105
  st.error(f"Error saving files: {str(e)}")
 
107
  # Add button at the bottom to run Find Differences function
108
  if st.button("Find Differences"):
109
  try:
110
+ # display_text, parsed_data = process_concurrently(extracted_text1, extracted_text2)
 
 
 
 
 
 
111
  # display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
112
+
113
+ i = 1
114
+ for etext1, etext2 in zip(extracted_text1, extracted_text2):
115
+ data = next((d for d in st.session_state.differences_data if d['etext1'] == etext1 and d['etext2'] == etext2), None)
116
+ if data:
117
+ pdata = data['pdata']
118
+ dext1 = data['dext1']
119
+ dext2 = data['dext2']
120
+ else:
121
+ pdata = get_llm_response(etext1, etext2)
122
+ dext1 = etext1
123
+ dext2 = etext2
124
+
125
+ for diff in pdata:
126
+ diff_text1 = diff['text1'].strip()
127
+ diff_text2 = diff['text2'].strip()
128
+ if diff_text1 == "" or diff_text2 == "": continue
129
+ diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
130
+ diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
131
+ dext1 = diff_text1_phrase.join(dext1.split(diff_text1)) if diff_text1 in dext1 else dext1
132
+ dext2 = diff_text2_phrase.join(dext2.split(diff_text2)) if diff_text2 in dext2 else dext2
133
+
134
+ st.session_state.differences_data.append({"etext1": etext1, "etext2": etext2, "pdata": pdata, "dext1": dext1, "dext2": dext2})
135
+ reverse_pdata = [{'text1': d['text2'], 'text2': d['text1'], 'explanation': d['explanation']} for d in pdata]
136
+ st.session_state.differences_data.append({"etext1": etext2, "etext2": etext1, "pdata": reverse_pdata, "dext1": dext2, "dext2": dext1})
137
 
138
+ display_text = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------\n" for d in pdata])
139
+
140
+ with st.expander(f"**Page {i}** - {filename1}"):
141
+ st.markdown("\n\n".join(dext1.splitlines()), unsafe_allow_html=True)
142
+ with st.expander(f"**Page {i}** - {filename2}"):
143
+ st.markdown("\n\n".join(dext2.splitlines()), unsafe_allow_html=True)
144
+ st.markdown(display_text)
145
+ i += 1
146
  except Exception as e:
147
  st.error(f"Error finding differences: {str(e)}")
148