Spaces:
Running
Running
import streamlit as st | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_openai import AzureChatOpenAI | |
from langchain.chains import create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_community.vectorstores import FAISS | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_openai import OpenAIEmbeddings | |
import pandas as pd | |
import io | |
import time | |
from langchain_unstructured import UnstructuredLoader | |
from typing import List, Dict, Tuple | |
from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
import tempfile | |
import os | |
class PDFExtract: | |
def __init__(self): | |
pass | |
def _extract_text_from_pdfs(self, file_paths: List[str]) -> List[str]: | |
"""Extract text content from PDF files. | |
Args: | |
file_paths (List[str]): List of file paths. | |
Returns: | |
List[str]: Extracted text from the PDFs. | |
""" | |
docs = [] | |
for file_path in file_paths: | |
loader = UnstructuredLoader(file_path, strategy="fast") | |
docs.extend(loader.load()) | |
return docs | |
def _split_text_into_chunks(self, text: str) -> List[str]: | |
"""Split text into smaller chunks. | |
Args: | |
text (str): Input text to be split. | |
Returns: | |
List[str]: List of smaller text chunks. | |
""" | |
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=2000, chunk_overlap=0, length_function=len) | |
chunks = text_splitter.split_documents(text) | |
return chunks | |
def _create_vector_store_from_text_chunks(self, text_chunks: List[str]) -> FAISS: | |
"""Create a vector store from text chunks. | |
Args: | |
text_chunks (List[str]): List of text chunks. | |
Returns: | |
FAISS: Vector store created from the text chunks. | |
""" | |
embeddings = AzureOpenAIEmbeddings( | |
azure_deployment="text-embedding-3-large", | |
) | |
return FAISS.from_documents(documents=text_chunks, embedding=embeddings) | |
def main(self, file_paths: List[str]): | |
text = self._extract_text_from_pdfs(file_paths) | |
text_chunks = self._split_text_into_chunks(text) | |
vector_store = self._create_vector_store_from_text_chunks(text_chunks) | |
return vector_store | |
# Function to process the uploaded file | |
def process_file(uploaded_file): | |
with st.spinner("Processing document..."): | |
# Save the uploaded file to a temporary location | |
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: | |
tmp_file.write(uploaded_file.getvalue()) | |
tmp_file_path = tmp_file.name | |
# Process the file | |
pdfextract = PDFExtract() | |
db = pdfextract.main([tmp_file_path]) | |
# Clean up the temporary file | |
os.unlink(tmp_file_path) | |
return db | |
# Set page configuration | |
st.set_page_config(page_title="GASB Decision Flow", layout="wide") | |
# Custom CSS for better UI | |
st.markdown(""" | |
<style> | |
.uploadfile-container { | |
display: flex; | |
justify-content: center; | |
margin-bottom: 20px; | |
} | |
.chat-container { | |
margin-top: 20px; | |
} | |
.stApp { | |
max-width: 1200px; | |
margin: 0 auto; | |
} | |
.loader { | |
border: 8px solid #f3f3f3; | |
border-top: 8px solid #3498db; | |
border-radius: 50%; | |
width: 50px; | |
height: 50px; | |
animation: spin 1s linear infinite; | |
margin: 20px auto; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
/* Hide scrollbars but keep scrolling functionality */ | |
::-webkit-scrollbar { | |
width: 0px; | |
height: 0px; | |
background: transparent; | |
} | |
* { | |
-ms-overflow-style: none; | |
scrollbar-width: none; | |
} | |
div[data-testid="stVerticalBlock"] { | |
overflow-x: hidden; | |
} | |
.element-container, .stTextInput, .stButton { | |
overflow: visible !important; | |
} | |
/* Custom chat message styling */ | |
.user-message-container { | |
display: flex; | |
justify-content: flex-end; | |
margin-bottom: 10px; | |
} | |
.st-emotion-cache-janbn0 | |
{ | |
margin-left: 3in; | |
} | |
.user-message { | |
background-color: #2b7dfa; | |
color: white; | |
border-radius: 18px 18px 0 18px; | |
padding: 10px 15px; | |
max-width: 70%; | |
text-align: right; | |
} | |
.assistant-message-container { | |
display: flex; | |
justify-content: flex-start; | |
margin-bottom: 10px; | |
} | |
.assistant-message { | |
background-color: #f1f1f1; | |
color: #333; | |
border-radius: 18px 18px 18px 0; | |
padding: 10px 15px; | |
max-width: 70%; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Title and description | |
st.title("22nd Century") | |
# Initialize session state for chat history | |
if 'messages' not in st.session_state: | |
st.session_state.messages = [] | |
if 'db' not in st.session_state: | |
st.session_state.db = None | |
if 'file_processed' not in st.session_state: | |
st.session_state.file_processed = False | |
# Center the file uploader | |
st.markdown('<div class="uploadfile-container">', unsafe_allow_html=True) | |
uploaded_file = st.file_uploader("Upload your contract document (PDF, Word, or Text)", type=["pdf", "docx", "txt"]) | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Process the file when uploaded | |
if uploaded_file and not st.session_state.file_processed: | |
db = process_file(uploaded_file) | |
if db: | |
st.session_state.db = db | |
st.session_state.file_processed = True | |
st.success(f"Document '{uploaded_file.name}' processed successfully!") | |
# GASB decision flow logic | |
if st.session_state.file_processed: | |
# Setup langchain components | |
retriever = st.session_state.db.as_retriever() | |
llm = AzureChatOpenAI(model='gpt-4o', temperature=0, max_tokens=3000) | |
system_prompt = ( | |
"Use the given context to answer the question. Answer yes or no with justify the answer detailed. " | |
"If you don't know the answer, say you don't know. " | |
"Use three sentence maximum and keep the answer concise. " | |
"""'GASB Do Not Apply' sentence include in the output for the following Questions Otherwise don't include: | |
Does the contract involve the use of software or capital assets? if answer is 'no' include 'GASB 87/96 Do Not Apply' in the answer. | |
Is the software an insignificant component to any fixed asset in the agreement? if answer is 'yes' include 'GASB 96 Do Not Apply' in the answer. | |
Is this a software that you are procuring? if answer is 'no' include 'GASB 96 Do Not Apply' in the answer. | |
Is it a perpetual license/agreement? if answer is 'yes' or 'no' include 'GASB 96 Do Not Apply' in the answer. | |
Lease Queries:{lease_queries} if 'yes' for all questions include 'GASB 87 Do Not Apply' in the answer. | |
Does the lease explicitly transfer ownership? if answer is 'no' include 'GASB 87 Do Not Apply' in the answer. | |
Must Return the Reason Why you answer yes or no. | |
""" | |
"Context: {context}" | |
) | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(llm, prompt) | |
chain = create_retrieval_chain(retriever, question_answer_chain) | |
# Define flows | |
initial_flow = ["Does the contract involve the use of software or capital assets?", "Does this contract include software?"] | |
software_flow = [ | |
"Is the software an insignificant component to any fixed asset in the agreement?", | |
"Is this a software that you are procuring?", | |
"Is it a perpetual license/agreement?" | |
] | |
lease_flow = [ | |
"Is this a lease of an intangible asset?", | |
"Is this a lease for supply contracts?", | |
"Is this a lease of inventory?", | |
"Does the lease explicitly transfer ownership?" | |
] | |
# Chat container | |
st.markdown('<div class="chat-container">', unsafe_allow_html=True) | |
st.subheader("GASB Decision Flow Chat") | |
# Display chat messages | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.write(message["content"]) | |
# Function to run the GASB decision flow | |
def run_gasb_flow(): | |
with st.spinner("Running initial questions..."): | |
execute = True | |
for question in initial_flow: | |
# Add user question to chat | |
st.session_state.messages.append({"role": "user", "content": question}) | |
with st.chat_message("user"): | |
st.write(question) | |
# Get AI response | |
with st.spinner("Thinking..."): | |
response = chain.invoke({"input": question, 'lease_queries': lease_flow}) | |
answer = response['answer'] | |
source = response['context'] | |
# Add AI response to chat | |
st.session_state.messages.append({"role": "assistant", "content": answer}) | |
with st.chat_message("assistant"): | |
st.write(source) | |
st.success(answer) | |
if "GASB" in answer: | |
st.info("Flow stopped due to GASB answer.") | |
execute = False | |
break | |
time.sleep(1) # Small delay for better UX | |
if execute: | |
if "software" in answer.lower(): | |
selected_flow = software_flow | |
st.info("Continuing with software flow...") | |
else: | |
selected_flow = lease_flow | |
st.info("Continuing with lease flow...") | |
for question in selected_flow: | |
# Add user question to chat | |
st.session_state.messages.append({"role": "user", "content": question}) | |
with st.chat_message("user"): | |
st.write(question) | |
# Get AI response | |
with st.spinner("Thinking..."): | |
response = chain.invoke({"input": question, 'lease_queries': lease_flow}) | |
answer = response['answer'] | |
source = response['context'] | |
# Add AI response to chat | |
st.session_state.messages.append({"role": "assistant", "content": answer}) | |
with st.chat_message("assistant"): | |
st.write(source) | |
st.success(answer) | |
if "GASB" in answer: | |
st.info("Flow stopped due to GASB answer.") | |
break | |
time.sleep(2) # Small delay for better UX | |
# Custom question input | |
if st.session_state.file_processed and 'custom_mode' not in st.session_state: | |
if st.button("Start GASB Decision Flow"): | |
run_gasb_flow() | |
st.session_state.custom_mode = True | |
st.markdown('</div>', unsafe_allow_html=True) | |
else: | |
st.info("Please upload a document to start the GASB decision flow") |