scai

Sleeping

App Files Files Community

scai / app.py

IAMTFRMZA

app.py

4bd484d verified 4 months ago

raw

history blame

5.04 kB

	import os
	import gradio as gr
	import subprocess
	import streamlit as st
	from openai import OpenAI
	import time
	import PyPDF2
	from docx import Document
	from datetime import datetime
	from pydub import AudioSegment
	from io import BytesIO

	# Streamlit Page Config
	st.set_page_config(page_title="Schlager ContractAi")
	st.title("Schlager ContractAi")
	st.caption("Chat with your contract or manage meeting minutes")

	# Sidebar for API Key input
	with st.sidebar:
	OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")

	# Tabs
	tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"])

	SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
	SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")

	def install_dependencies():
	try:
	subprocess.run(['apt-get', 'update'], check=True)
	subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True)
	subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
	subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True)
	print("Packages installed successfully!")
	except subprocess.CalledProcessError as e:
	print(f"An error occurred: {e}")

	install_dependencies()

	def process_pdf(file):
	input_pdf = file.name
	os.system(f'pdftoppm -png "{input_pdf}" img')

	for image in os.listdir():
	if image.startswith('img') and image.endswith('.png'):
	output_txt = f"ocr_{image}.txt"
	os.system(f'tesseract "{image}" "{output_txt[:-4]}"')

	output_txt_file = f"{input_pdf[:-4]}.txt"
	with open(output_txt_file, 'w') as output_file:
	for text_file in os.listdir():
	if text_file.startswith('ocr_img') and text_file.endswith('.txt'):
	with open(text_file, 'r') as f:
	output_file.write(f.read())
	output_file.write("\n")

	for file in os.listdir():
	if file.startswith('img') or file.startswith('ocr_img'):
	os.remove(file)

	return output_txt_file

	with tab4:
	st.subheader("Document Preparation")
	uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False)

	if uploaded_file:
	st.write("Processing the uploaded document...")
	result_file = process_pdf(uploaded_file)
	st.download_button(
	label="Download Extracted Text",
	data=open(result_file, "rb").read(),
	file_name=result_file,
	mime="text/plain"
	)

	# Contract Chat Section
	def contract_chat_section(tab, assistant_id, session_key, input_key):
	with tab:
	st.subheader("Chat")

	if OPENAI_API_KEY:
	client = OpenAI(api_key=OPENAI_API_KEY)
	else:
	st.error("Please enter your C2 Group of Technologies Access Key to continue.")
	st.stop()

	if session_key not in st.session_state:
	st.session_state[session_key] = []

	if st.button("Clear Chat", key=f"clear_chat_{session_key}"):
	st.session_state[session_key] = []
	st.rerun()

	for message in st.session_state[session_key]:
	role, content = message["role"], message["content"]
	st.chat_message(role).write(content)

	if prompt := st.chat_input("Enter your message:", key=input_key):
	st.session_state[session_key].append({"role": "user", "content": prompt})
	st.chat_message("user").write(prompt)

	try:
	thread = client.beta.threads.create()
	thread_id = thread.id
	client.beta.threads.messages.create(
	thread_id=thread_id,
	role="user",
	content=prompt
	)

	run = client.beta.threads.runs.create(
	thread_id=thread_id,
	assistant_id=assistant_id
	)

	while True:
	run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
	if run_status.status == "completed":
	break
	time.sleep(1)

	messages = client.beta.threads.messages.list(thread_id=thread_id)
	assistant_message = messages.data[0].content[0].text.value
	st.chat_message("assistant").write(assistant_message)
	st.session_state[session_key].append({"role": "assistant", "content": assistant_message})
	except Exception as e:
	st.error(f"Error: {str(e)}")

	ASSISTANT_CONTRACT_ID = "asst_rd9h8PfYuOmHbkvOF3RTmVfn"
	ASSISTANT_TECHNICAL_ID = "asst_xizNZBCJuy4TqdjqjwkxbAki"

	# Contract Chat Section
	contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contract_input")

	# Technical Chat Section
	contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")