Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain_community.document_loaders import PDFPlumberLoader | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_core.vectorstores import InMemoryVectorStore | |
from langchain_ollama import OllamaEmbeddings | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_ollama.llms import OllamaLLM | |
st.markdown(""" | |
<style> | |
.stApp { | |
background-color: #121826; /* Deep Navy Blue */ | |
color: #EAEAEA; /* Soft White */ | |
} | |
/* Chat Input Styling */ | |
.stChatInput input { | |
background-color: #1A2238 !important; /* Dark Blue */ | |
color: #F5F5F5 !important; /* Light Gray */ | |
border: 1px solid #3E4C72 !important; /* Muted Blue */ | |
} | |
/* User Message Styling */ | |
.stChatMessage[data-testid="stChatMessage"]:nth-child(odd) { | |
background-color: #1F2A44 !important; /* Dark Blue Gray */ | |
border: 1px solid #4A5C89 !important; /* Subtle Blue */ | |
color: #D1D5DB !important; /* Soft White */ | |
border-radius: 10px; | |
padding: 15px; | |
margin: 10px 0; | |
} | |
/* Assistant Message Styling */ | |
.stChatMessage[data-testid="stChatMessage"]:nth-child(even) { | |
background-color: #253350 !important; /* Rich Deep Blue */ | |
border: 1px solid #5C6FA9 !important; /* Light Blue Accent */ | |
color: #F3F4F6 !important; /* Soft White */ | |
border-radius: 10px; | |
padding: 15px; | |
margin: 10px 0; | |
} | |
/* Avatar Styling */ | |
.stChatMessage .avatar { | |
background-color: #4CAF50 !important; /* Vibrant Green */ | |
color: #FFFFFF !important; /* White */ | |
} | |
/* Text Color Fix */ | |
.stChatMessage p, .stChatMessage div { | |
color: #EAEAEA !important; /* Soft White */ | |
} | |
.stFileUploader { | |
background-color: #1A2238; | |
border: 1px solid #4A5C89; | |
border-radius: 5px; | |
padding: 15px; | |
} | |
h1, h2, h3 { | |
color: #4CAF50 !important; /* Green Accent */ | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
PROMPT_TEMPLATE = """ | |
You are an expert research assistant. Use the provided context to answer the query. | |
If unsure, state that you don't know. Be concise and factual (max 3 sentences). | |
Query: {user_query} | |
Context: {document_context} | |
Answer: | |
""" | |
PDF_STORAGE_PATH = 'document_store/pdfs/' | |
EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b") | |
DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL) | |
LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b") | |
def save_uploaded_file(uploaded_file): | |
file_path = PDF_STORAGE_PATH + uploaded_file.name | |
with open(file_path, "wb") as file: | |
file.write(uploaded_file.getbuffer()) | |
return file_path | |
def load_pdf_documents(file_path): | |
document_loader = PDFPlumberLoader(file_path) | |
return document_loader.load() | |
def chunk_documents(raw_documents): | |
text_processor = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=200, | |
add_start_index=True | |
) | |
return text_processor.split_documents(raw_documents) | |
def index_documents(document_chunks): | |
DOCUMENT_VECTOR_DB.add_documents(document_chunks) | |
def find_related_documents(query): | |
return DOCUMENT_VECTOR_DB.similarity_search(query) | |
def generate_answer(user_query, context_documents): | |
context_text = "\n\n".join([doc.page_content for doc in context_documents]) | |
conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) | |
response_chain = conversation_prompt | LANGUAGE_MODEL | |
return response_chain.invoke({"user_query": user_query, "document_context": context_text}) | |
# UI Configuration | |
st.title("π SmartDoc AI") | |
st.markdown("### AI-Powered Document Assistant") | |
st.markdown("---") | |
# File Upload Section | |
uploaded_pdf = st.file_uploader( | |
"Upload Research Document (PDF)", | |
type="pdf", | |
help="Select a PDF document for analysis", | |
accept_multiple_files=False | |
) | |
if uploaded_pdf: | |
saved_path = save_uploaded_file(uploaded_pdf) | |
raw_docs = load_pdf_documents(saved_path) | |
processed_chunks = chunk_documents(raw_docs) | |
index_documents(processed_chunks) | |
st.success("β Document processed successfully! Ask your questions below.") | |
user_input = st.chat_input("Enter your question about the document...") | |
if user_input: | |
with st.chat_message("user"): | |
st.write(user_input) | |
with st.spinner("Analyzing document..."): | |
relevant_docs = find_related_documents(user_input) | |
ai_response = generate_answer(user_input, relevant_docs) | |
with st.chat_message("assistant", avatar="π€"): | |
st.write(ai_response) | |