File size: 5,042 Bytes
4bd484d
 
 
ead511a
 
fd85cd0
e35dc50
4bd484d
e35dc50
 
14a0e81
ead511a
4bd484d
528891b
 
16a0a0c
ead511a
16a0a0c
ead511a
39ea5ba
fd85cd0
4bd484d
 
ead511a
e35dc50
 
 
4bd484d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ead511a
4bd484d
a01800e
35da82e
 
 
 
 
 
 
 
 
d25fa67
 
35da82e
d25fa67
 
35da82e
 
d25fa67
35da82e
 
 
a01800e
d25fa67
35da82e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d25fa67
35da82e
 
 
 
a01800e
35da82e
 
a01800e
35da82e
 
a01800e
35da82e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gradio as gr
import subprocess
import streamlit as st
from openai import OpenAI
import time
import PyPDF2
from docx import Document
from datetime import datetime
from pydub import AudioSegment
from io import BytesIO

# Streamlit Page Config
st.set_page_config(page_title="Schlager ContractAi")
st.title("Schlager ContractAi")
st.caption("Chat with your contract or manage meeting minutes")

# Sidebar for API Key input
with st.sidebar:
    OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")

# Tabs
tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"])

SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")

def install_dependencies():
    try:
        subprocess.run(['apt-get', 'update'], check=True)
        subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True)
        subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
        subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True)
        print("Packages installed successfully!")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred: {e}")

install_dependencies()

def process_pdf(file):
    input_pdf = file.name
    os.system(f'pdftoppm -png "{input_pdf}" img')
    
    for image in os.listdir():
        if image.startswith('img') and image.endswith('.png'):
            output_txt = f"ocr_{image}.txt"
            os.system(f'tesseract "{image}" "{output_txt[:-4]}"')
    
    output_txt_file = f"{input_pdf[:-4]}.txt"
    with open(output_txt_file, 'w') as output_file:
        for text_file in os.listdir():
            if text_file.startswith('ocr_img') and text_file.endswith('.txt'):
                with open(text_file, 'r') as f:
                    output_file.write(f.read())
                    output_file.write("\n")
    
    for file in os.listdir():
        if file.startswith('img') or file.startswith('ocr_img'):
            os.remove(file)
    
    return output_txt_file

with tab4:
    st.subheader("Document Preparation")
    uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False)
    
    if uploaded_file:
        st.write("Processing the uploaded document...")
        result_file = process_pdf(uploaded_file)
        st.download_button(
            label="Download Extracted Text",
            data=open(result_file, "rb").read(),
            file_name=result_file,
            mime="text/plain"
        )

# Contract Chat Section
def contract_chat_section(tab, assistant_id, session_key, input_key):
    with tab:
        st.subheader("Chat")
        
        if OPENAI_API_KEY:
            client = OpenAI(api_key=OPENAI_API_KEY)
        else:
            st.error("Please enter your C2 Group of Technologies Access Key to continue.")
            st.stop()

        if session_key not in st.session_state:
            st.session_state[session_key] = []

        if st.button("Clear Chat", key=f"clear_chat_{session_key}"):
            st.session_state[session_key] = []
            st.rerun()

        for message in st.session_state[session_key]:
            role, content = message["role"], message["content"]
            st.chat_message(role).write(content)

        if prompt := st.chat_input("Enter your message:", key=input_key):
            st.session_state[session_key].append({"role": "user", "content": prompt})
            st.chat_message("user").write(prompt)

            try:
                thread = client.beta.threads.create()
                thread_id = thread.id
                client.beta.threads.messages.create(
                    thread_id=thread_id,
                    role="user",
                    content=prompt
                )

                run = client.beta.threads.runs.create(
                    thread_id=thread_id,
                    assistant_id=assistant_id
                )

                while True:
                    run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
                    if run_status.status == "completed":
                        break
                    time.sleep(1)

                messages = client.beta.threads.messages.list(thread_id=thread_id)
                assistant_message = messages.data[0].content[0].text.value
                st.chat_message("assistant").write(assistant_message)
                st.session_state[session_key].append({"role": "assistant", "content": assistant_message})
            except Exception as e:
                st.error(f"Error: {str(e)}")

ASSISTANT_CONTRACT_ID = "asst_rd9h8PfYuOmHbkvOF3RTmVfn"
ASSISTANT_TECHNICAL_ID = "asst_xizNZBCJuy4TqdjqjwkxbAki"

# Contract Chat Section
contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contract_input")

# Technical Chat Section
contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")