File size: 5,631 Bytes
180a0cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3073f86
180a0cc
 
 
 
 
 
3073f86
180a0cc
3073f86
 
180a0cc
 
3073f86
89d3f24
180a0cc
 
 
89d3f24
180a0cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89d3f24
180a0cc
 
 
 
 
 
 
 
 
 
3073f86
180a0cc
 
3073f86
180a0cc
 
 
 
 
 
 
 
89d3f24
 
 
 
 
 
3073f86
 
 
 
 
67b4e55
3073f86
89d3f24
67b4e55
3073f86
89d3f24
180a0cc
 
 
 
 
3073f86
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
from PyPDF2 import PdfReader
from anthropic import Anthropic
from prompts import DIFFERENTIATE_PROMPT

def extract_differences(input_text):
        input_text = input_text.strip()
        qa_pairs = input_text.split('</difference>')
        
        # Initialize an empty list to hold the parsed dictionary objects
        parsed_data = []
        
        # Iterate over each QA pair
        for pair in qa_pairs:
            # Check if the pair has both question and answer (ignoring the last one)
            if '<text1_section>' in pair and '</text1_section>' in pair and '<text2_section>' in pair and '</text2_section>' in pair and '<explanation>' in pair and '</explanation>' in pair:
                # Extract the question and answer text
                text1 = pair.split('<text1_section>')[1].split('</text1_section>')[0]
                text2 = pair.split('<text2_section>')[1].split('</text2_section>')[0]
                explanation = pair.split('<explanation>')[1].split('</explanation>')[0]
                
                # Create a dictionary for the current pair and append it to the list
                parsed_data.append({'text1': text1.strip(), 'text2': text2.strip(), 'explanation': explanation.strip()})
        
        return parsed_data

st.cache_data()
def make_llm_api_call(prompt):
    client = Anthropic()
    message = client.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=4096,
        temperature=0,
        messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
    )
    return message


def get_llm_response(extractedtext1, extractedtext2):
    prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
    
    message = make_llm_api_call(prompt)
    
    message_text = message.content[0].text

    try:
        before_differences = message_text.split("<differences>")[0]
        after_differences = message_text.split("</differences>")[1]
        differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
    except Exception as e:
        print("Error:", e)
        return message_text, []
    
    difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list])
    display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
    return display_text, differences_list
    

def extract_text_with_pypdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text


def main():
    st.set_page_config(layout="wide")  # Enable wide layout
    st.markdown('<div style="text-align: center;">' + '<h1>PDF Upload and Compare App</h1>' + '</div>', unsafe_allow_html=True)
    
    # Create columns for side-by-side buttons
    col1, col2 = st.columns([2, 2])
        
    # Add upload button to left column
    uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf")
    
    # Add upload button to right column
    uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf")
    
    # Check if both files are uploaded
    if uploaded_file1 and uploaded_file2:
        # Get filenames from uploaded files
        filename1 = uploaded_file1.name
        filename2 = uploaded_file2.name
        
        try:
            extracted_text1 = extract_text_with_pypdf(uploaded_file1)
            extracted_text2 = extract_text_with_pypdf(uploaded_file2)
            with col1.expander(filename1):
                st.write("\n\n".join(extracted_text1.splitlines()))
            
            with col2.expander(filename2):
                st.write("\n\n".join(extracted_text2.splitlines()))
                
            st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
        except Exception as e:
            st.error(f"Error saving files: {str(e)}")

        # Add button at the bottom to run Find Differences function
        if st.button("Find Differences"):
            try:
                display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
                display_text1 = extracted_text1
                display_text2 = extracted_text2

                for diff in parsed_data:
                    diff_text1 = diff['text1'].strip()
                    diff_text2 = diff['text2'].strip()                        
                    diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
                    diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
                    display_text1 = diff_text1_phrase.join(display_text1.split(diff_text1)) if diff_text1 in display_text1 else display_text1
                    display_text2 = diff_text2_phrase.join(display_text2.split(diff_text2)) if diff_text2 in display_text2 else display_text2
                with col1.expander(f"{filename1} Difference Highlighted"):
                    st.write("\n\n".join(display_text1.splitlines()), unsafe_allow_html=True)

                with col2.expander(f"{filename2} Difference Highlighted"):
                    st.write("\n\n".join(display_text2.splitlines()), unsafe_allow_html=True)
                    
                st.markdown(display_text)
            except Exception as e:
                st.error(f"Error finding differences: {str(e)}")

if __name__ == "__main__":
    main()