scai / app.py
IAMTFRMZA's picture
app.py
4bd484d verified
raw
history blame
5.04 kB
import os
import gradio as gr
import subprocess
import streamlit as st
from openai import OpenAI
import time
import PyPDF2
from docx import Document
from datetime import datetime
from pydub import AudioSegment
from io import BytesIO
# Streamlit Page Config
st.set_page_config(page_title="Schlager ContractAi")
st.title("Schlager ContractAi")
st.caption("Chat with your contract or manage meeting minutes")
# Sidebar for API Key input
with st.sidebar:
OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")
# Tabs
tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"])
SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")
def install_dependencies():
try:
subprocess.run(['apt-get', 'update'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True)
print("Packages installed successfully!")
except subprocess.CalledProcessError as e:
print(f"An error occurred: {e}")
install_dependencies()
def process_pdf(file):
input_pdf = file.name
os.system(f'pdftoppm -png "{input_pdf}" img')
for image in os.listdir():
if image.startswith('img') and image.endswith('.png'):
output_txt = f"ocr_{image}.txt"
os.system(f'tesseract "{image}" "{output_txt[:-4]}"')
output_txt_file = f"{input_pdf[:-4]}.txt"
with open(output_txt_file, 'w') as output_file:
for text_file in os.listdir():
if text_file.startswith('ocr_img') and text_file.endswith('.txt'):
with open(text_file, 'r') as f:
output_file.write(f.read())
output_file.write("\n")
for file in os.listdir():
if file.startswith('img') or file.startswith('ocr_img'):
os.remove(file)
return output_txt_file
with tab4:
st.subheader("Document Preparation")
uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False)
if uploaded_file:
st.write("Processing the uploaded document...")
result_file = process_pdf(uploaded_file)
st.download_button(
label="Download Extracted Text",
data=open(result_file, "rb").read(),
file_name=result_file,
mime="text/plain"
)
# Contract Chat Section
def contract_chat_section(tab, assistant_id, session_key, input_key):
with tab:
st.subheader("Chat")
if OPENAI_API_KEY:
client = OpenAI(api_key=OPENAI_API_KEY)
else:
st.error("Please enter your C2 Group of Technologies Access Key to continue.")
st.stop()
if session_key not in st.session_state:
st.session_state[session_key] = []
if st.button("Clear Chat", key=f"clear_chat_{session_key}"):
st.session_state[session_key] = []
st.rerun()
for message in st.session_state[session_key]:
role, content = message["role"], message["content"]
st.chat_message(role).write(content)
if prompt := st.chat_input("Enter your message:", key=input_key):
st.session_state[session_key].append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
try:
thread = client.beta.threads.create()
thread_id = thread.id
client.beta.threads.messages.create(
thread_id=thread_id,
role="user",
content=prompt
)
run = client.beta.threads.runs.create(
thread_id=thread_id,
assistant_id=assistant_id
)
while True:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = client.beta.threads.messages.list(thread_id=thread_id)
assistant_message = messages.data[0].content[0].text.value
st.chat_message("assistant").write(assistant_message)
st.session_state[session_key].append({"role": "assistant", "content": assistant_message})
except Exception as e:
st.error(f"Error: {str(e)}")
ASSISTANT_CONTRACT_ID = "asst_rd9h8PfYuOmHbkvOF3RTmVfn"
ASSISTANT_TECHNICAL_ID = "asst_xizNZBCJuy4TqdjqjwkxbAki"
# Contract Chat Section
contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contract_input")
# Technical Chat Section
contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")