|
import os |
|
import gradio as gr |
|
import subprocess |
|
import streamlit as st |
|
from openai import OpenAI |
|
import time |
|
import PyPDF2 |
|
from docx import Document |
|
from datetime import datetime |
|
from pydub import AudioSegment |
|
from io import BytesIO |
|
|
|
|
|
st.set_page_config(page_title="Schlager ContractAi") |
|
st.title("Schlager ContractAi") |
|
st.caption("Chat with your contract or manage meeting minutes") |
|
|
|
|
|
with st.sidebar: |
|
OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password") |
|
|
|
|
|
tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"]) |
|
|
|
SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a") |
|
SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf") |
|
|
|
def install_dependencies(): |
|
try: |
|
subprocess.run(['apt-get', 'update'], check=True) |
|
subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True) |
|
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True) |
|
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True) |
|
print("Packages installed successfully!") |
|
except subprocess.CalledProcessError as e: |
|
print(f"An error occurred: {e}") |
|
|
|
install_dependencies() |
|
|
|
def process_pdf(file): |
|
input_pdf = file.name |
|
os.system(f'pdftoppm -png "{input_pdf}" img') |
|
|
|
for image in os.listdir(): |
|
if image.startswith('img') and image.endswith('.png'): |
|
output_txt = f"ocr_{image}.txt" |
|
os.system(f'tesseract "{image}" "{output_txt[:-4]}"') |
|
|
|
output_txt_file = f"{input_pdf[:-4]}.txt" |
|
with open(output_txt_file, 'w') as output_file: |
|
for text_file in os.listdir(): |
|
if text_file.startswith('ocr_img') and text_file.endswith('.txt'): |
|
with open(text_file, 'r') as f: |
|
output_file.write(f.read()) |
|
output_file.write("\n") |
|
|
|
for file in os.listdir(): |
|
if file.startswith('img') or file.startswith('ocr_img'): |
|
os.remove(file) |
|
|
|
return output_txt_file |
|
|
|
with tab4: |
|
st.subheader("Document Preparation") |
|
uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False) |
|
|
|
if uploaded_file: |
|
st.write("Processing the uploaded document...") |
|
result_file = process_pdf(uploaded_file) |
|
st.download_button( |
|
label="Download Extracted Text", |
|
data=open(result_file, "rb").read(), |
|
file_name=result_file, |
|
mime="text/plain" |
|
) |
|
|
|
|
|
def contract_chat_section(tab, assistant_id, session_key, input_key): |
|
with tab: |
|
st.subheader("Chat") |
|
|
|
if OPENAI_API_KEY: |
|
client = OpenAI(api_key=OPENAI_API_KEY) |
|
else: |
|
st.error("Please enter your C2 Group of Technologies Access Key to continue.") |
|
st.stop() |
|
|
|
if session_key not in st.session_state: |
|
st.session_state[session_key] = [] |
|
|
|
if st.button("Clear Chat", key=f"clear_chat_{session_key}"): |
|
st.session_state[session_key] = [] |
|
st.rerun() |
|
|
|
for message in st.session_state[session_key]: |
|
role, content = message["role"], message["content"] |
|
st.chat_message(role).write(content) |
|
|
|
if prompt := st.chat_input("Enter your message:", key=input_key): |
|
st.session_state[session_key].append({"role": "user", "content": prompt}) |
|
st.chat_message("user").write(prompt) |
|
|
|
try: |
|
thread = client.beta.threads.create() |
|
thread_id = thread.id |
|
client.beta.threads.messages.create( |
|
thread_id=thread_id, |
|
role="user", |
|
content=prompt |
|
) |
|
|
|
run = client.beta.threads.runs.create( |
|
thread_id=thread_id, |
|
assistant_id=assistant_id |
|
) |
|
|
|
while True: |
|
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id) |
|
if run_status.status == "completed": |
|
break |
|
time.sleep(1) |
|
|
|
messages = client.beta.threads.messages.list(thread_id=thread_id) |
|
assistant_message = messages.data[0].content[0].text.value |
|
st.chat_message("assistant").write(assistant_message) |
|
st.session_state[session_key].append({"role": "assistant", "content": assistant_message}) |
|
except Exception as e: |
|
st.error(f"Error: {str(e)}") |
|
|
|
ASSISTANT_CONTRACT_ID = "asst_rd9h8PfYuOmHbkvOF3RTmVfn" |
|
ASSISTANT_TECHNICAL_ID = "asst_xizNZBCJuy4TqdjqjwkxbAki" |
|
|
|
|
|
contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contract_input") |
|
|
|
|
|
contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input") |
|
|
|
|