File size: 5,042 Bytes
4bd484d ead511a fd85cd0 e35dc50 4bd484d e35dc50 14a0e81 ead511a 4bd484d 528891b 16a0a0c ead511a 16a0a0c ead511a 39ea5ba fd85cd0 4bd484d ead511a e35dc50 4bd484d ead511a 4bd484d a01800e 35da82e d25fa67 35da82e d25fa67 35da82e d25fa67 35da82e a01800e d25fa67 35da82e d25fa67 35da82e a01800e 35da82e a01800e 35da82e a01800e 35da82e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import gradio as gr
import subprocess
import streamlit as st
from openai import OpenAI
import time
import PyPDF2
from docx import Document
from datetime import datetime
from pydub import AudioSegment
from io import BytesIO
# Streamlit Page Config
st.set_page_config(page_title="Schlager ContractAi")
st.title("Schlager ContractAi")
st.caption("Chat with your contract or manage meeting minutes")
# Sidebar for API Key input
with st.sidebar:
OPENAI_API_KEY = st.text_input("Enter your C2 Group of Technologies Access Key", type="password")
# Tabs
tab1, tab2, tab3, tab4 = st.tabs(["Contract", "Technical", "Minutes", "Document Preparation"])
SUPPORTED_AUDIO_FORMATS = (".mp3", ".wav", ".m4a")
SUPPORTED_TEXT_FORMATS = (".txt", ".docx", ".csv", ".xlsx", ".pdf")
def install_dependencies():
try:
subprocess.run(['apt-get', 'update'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr'], check=True)
subprocess.run(['apt-get', 'install', '-y', 'tesseract-ocr-eng'], check=True)
print("Packages installed successfully!")
except subprocess.CalledProcessError as e:
print(f"An error occurred: {e}")
install_dependencies()
def process_pdf(file):
input_pdf = file.name
os.system(f'pdftoppm -png "{input_pdf}" img')
for image in os.listdir():
if image.startswith('img') and image.endswith('.png'):
output_txt = f"ocr_{image}.txt"
os.system(f'tesseract "{image}" "{output_txt[:-4]}"')
output_txt_file = f"{input_pdf[:-4]}.txt"
with open(output_txt_file, 'w') as output_file:
for text_file in os.listdir():
if text_file.startswith('ocr_img') and text_file.endswith('.txt'):
with open(text_file, 'r') as f:
output_file.write(f.read())
output_file.write("\n")
for file in os.listdir():
if file.startswith('img') or file.startswith('ocr_img'):
os.remove(file)
return output_txt_file
with tab4:
st.subheader("Document Preparation")
uploaded_file = st.file_uploader("Upload a PDF (Max: 200MB)", type=["pdf"], accept_multiple_files=False)
if uploaded_file:
st.write("Processing the uploaded document...")
result_file = process_pdf(uploaded_file)
st.download_button(
label="Download Extracted Text",
data=open(result_file, "rb").read(),
file_name=result_file,
mime="text/plain"
)
# Contract Chat Section
def contract_chat_section(tab, assistant_id, session_key, input_key):
with tab:
st.subheader("Chat")
if OPENAI_API_KEY:
client = OpenAI(api_key=OPENAI_API_KEY)
else:
st.error("Please enter your C2 Group of Technologies Access Key to continue.")
st.stop()
if session_key not in st.session_state:
st.session_state[session_key] = []
if st.button("Clear Chat", key=f"clear_chat_{session_key}"):
st.session_state[session_key] = []
st.rerun()
for message in st.session_state[session_key]:
role, content = message["role"], message["content"]
st.chat_message(role).write(content)
if prompt := st.chat_input("Enter your message:", key=input_key):
st.session_state[session_key].append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
try:
thread = client.beta.threads.create()
thread_id = thread.id
client.beta.threads.messages.create(
thread_id=thread_id,
role="user",
content=prompt
)
run = client.beta.threads.runs.create(
thread_id=thread_id,
assistant_id=assistant_id
)
while True:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
if run_status.status == "completed":
break
time.sleep(1)
messages = client.beta.threads.messages.list(thread_id=thread_id)
assistant_message = messages.data[0].content[0].text.value
st.chat_message("assistant").write(assistant_message)
st.session_state[session_key].append({"role": "assistant", "content": assistant_message})
except Exception as e:
st.error(f"Error: {str(e)}")
ASSISTANT_CONTRACT_ID = "asst_rd9h8PfYuOmHbkvOF3RTmVfn"
ASSISTANT_TECHNICAL_ID = "asst_xizNZBCJuy4TqdjqjwkxbAki"
# Contract Chat Section
contract_chat_section(tab1, ASSISTANT_CONTRACT_ID, "contract_messages", "contract_input")
# Technical Chat Section
contract_chat_section(tab2, ASSISTANT_TECHNICAL_ID, "technical_messages", "technical_input")
|