viboognesh commited on
Commit
748d430
·
verified ·
1 Parent(s): 3073f86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -14
app.py CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
2
  from PyPDF2 import PdfReader
3
  from anthropic import Anthropic
4
  from prompts import DIFFERENTIATE_PROMPT
 
 
 
5
 
6
  def extract_differences(input_text):
7
  input_text = input_text.strip()
@@ -44,23 +47,53 @@ def get_llm_response(extractedtext1, extractedtext2):
44
  message_text = message.content[0].text
45
 
46
  try:
47
- before_differences = message_text.split("<differences>")[0]
48
- after_differences = message_text.split("</differences>")[1]
49
- differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
 
 
 
50
  except Exception as e:
51
  print("Error:", e)
52
  return message_text, []
53
 
54
- difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list])
55
- display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
 
56
  return display_text, differences_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
 
 
 
 
 
 
58
 
59
  def extract_text_with_pypdf(pdf_path):
60
  reader = PdfReader(pdf_path)
61
- text = ""
62
  for page in reader.pages:
63
- text += page.extract_text() + "\n"
64
  return text
65
 
66
 
@@ -87,10 +120,10 @@ def main():
87
  extracted_text1 = extract_text_with_pypdf(uploaded_file1)
88
  extracted_text2 = extract_text_with_pypdf(uploaded_file2)
89
  with col1.expander(filename1):
90
- st.write("\n\n".join(extracted_text1.splitlines()))
91
 
92
  with col2.expander(filename2):
93
- st.write("\n\n".join(extracted_text2.splitlines()))
94
 
95
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
96
  except Exception as e:
@@ -99,13 +132,21 @@ def main():
99
  # Add button at the bottom to run Find Differences function
100
  if st.button("Find Differences"):
101
  try:
102
- display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
103
- display_text1 = extracted_text1
104
- display_text2 = extracted_text2
 
 
 
 
 
 
 
105
 
106
  for diff in parsed_data:
107
  diff_text1 = diff['text1'].strip()
108
- diff_text2 = diff['text2'].strip()
 
109
  diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
110
  diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
111
  display_text1 = diff_text1_phrase.join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1
@@ -121,4 +162,4 @@ def main():
121
  st.error(f"Error finding differences: {str(e)}")
122
 
123
  if __name__ == "__main__":
124
- main()
 
2
  from PyPDF2 import PdfReader
3
  from anthropic import Anthropic
4
  from prompts import DIFFERENTIATE_PROMPT
5
+ from concurrent.futures import ProcessPoolExecutor
6
+
7
+
8
 
9
  def extract_differences(input_text):
10
  input_text = input_text.strip()
 
47
  message_text = message.content[0].text
48
 
49
  try:
50
+ try:
51
+ before_differences = message_text.split("<differences>")[0]
52
+ after_differences = message_text.split("</differences>")[1]
53
+ differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
54
+ except Exception as e:
55
+ differences_list = extract_differences(message_text)
56
  except Exception as e:
57
  print("Error:", e)
58
  return message_text, []
59
 
60
+ difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------\n" for d in differences_list])
61
+ # display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
62
+ display_text = difference_content
63
  return display_text, differences_list
64
+
65
+ def process_text_pair(pair):
66
+ etext1, etext2 = pair
67
+ dtext, pdata = get_llm_response(etext1, etext2)
68
+ return dtext, pdata
69
+
70
+ def process_concurrently(extracted_text1, extracted_text2):
71
+ # Create a pool of worker processes
72
+ with ProcessPoolExecutor(max_workers=5) as executor:
73
+ # Submit tasks to the pool
74
+ futures = [executor.submit(process_text_pair, (etext1, etext2))
75
+ for etext1, etext2 in zip(extracted_text1, extracted_text2)]
76
+
77
+ # Collect results
78
+ display_text = ""
79
+ parsed_data = []
80
+ for future in futures:
81
+ result = future.result()
82
+ display_text += result[0]
83
+ parsed_data += result[1]
84
 
85
+ # Combine results
86
+ # display_text = ''.join(display_texts)
87
+ # parsed_data = ''.join(parsed_datas)
88
+
89
+ return display_text, parsed_data
90
+
91
 
92
  def extract_text_with_pypdf(pdf_path):
93
  reader = PdfReader(pdf_path)
94
+ text = []
95
  for page in reader.pages:
96
+ text.append(page.extract_text())
97
  return text
98
 
99
 
 
120
  extracted_text1 = extract_text_with_pypdf(uploaded_file1)
121
  extracted_text2 = extract_text_with_pypdf(uploaded_file2)
122
  with col1.expander(filename1):
123
+ st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text1).splitlines()))
124
 
125
  with col2.expander(filename2):
126
+ st.write("\n\n".join("\n\n------------------------\n\n".join(extracted_text2).splitlines()))
127
 
128
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
129
  except Exception as e:
 
132
  # Add button at the bottom to run Find Differences function
133
  if st.button("Find Differences"):
134
  try:
135
+ display_text = ""
136
+ parsed_data = []
137
+ # for etext1, etext2 in zip(extracted_text1, extracted_text2):
138
+ # dtext, pdata = get_llm_response(etext1, etext2)
139
+ # display_text += dtext
140
+ # parsed_data += pdata
141
+ display_text, parsed_data = process_concurrently(extracted_text1, extracted_text2)
142
+ # display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
143
+ display_text1 = "\n\n------------------------\n\n".join(extracted_text1)
144
+ display_text2 = "\n\n------------------------\n\n".join(extracted_text2)
145
 
146
  for diff in parsed_data:
147
  diff_text1 = diff['text1'].strip()
148
+ diff_text2 = diff['text2'].strip()
149
+ if diff_text1 == "" or diff_text2 == "": continue
150
  diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
151
  diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
152
  display_text1 = diff_text1_phrase.join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1
 
162
  st.error(f"Error finding differences: {str(e)}")
163
 
164
  if __name__ == "__main__":
165
+ main()