Spaces:
Sleeping
Sleeping
File size: 4,881 Bytes
0f0eea2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
st.markdown("""
<style>
.stApp {
background-color: #121826; /* Deep Navy Blue */
color: #EAEAEA; /* Soft White */
}
/* Chat Input Styling */
.stChatInput input {
background-color: #1A2238 !important; /* Dark Blue */
color: #F5F5F5 !important; /* Light Gray */
border: 1px solid #3E4C72 !important; /* Muted Blue */
}
/* User Message Styling */
.stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {
background-color: #1F2A44 !important; /* Dark Blue Gray */
border: 1px solid #4A5C89 !important; /* Subtle Blue */
color: #D1D5DB !important; /* Soft White */
border-radius: 10px;
padding: 15px;
margin: 10px 0;
}
/* Assistant Message Styling */
.stChatMessage[data-testid="stChatMessage"]:nth-child(even) {
background-color: #253350 !important; /* Rich Deep Blue */
border: 1px solid #5C6FA9 !important; /* Light Blue Accent */
color: #F3F4F6 !important; /* Soft White */
border-radius: 10px;
padding: 15px;
margin: 10px 0;
}
/* Avatar Styling */
.stChatMessage .avatar {
background-color: #4CAF50 !important; /* Vibrant Green */
color: #FFFFFF !important; /* White */
}
/* Text Color Fix */
.stChatMessage p, .stChatMessage div {
color: #EAEAEA !important; /* Soft White */
}
.stFileUploader {
background-color: #1A2238;
border: 1px solid #4A5C89;
border-radius: 5px;
padding: 15px;
}
h1, h2, h3 {
color: #4CAF50 !important; /* Green Accent */
}
</style>
""", unsafe_allow_html=True)
PROMPT_TEMPLATE = """
You are an expert research assistant. Use the provided context to answer the query.
If unsure, state that you don't know. Be concise and factual (max 3 sentences).
Query: {user_query}
Context: {document_context}
Answer:
"""
PDF_STORAGE_PATH = 'document_store/pdfs/'
EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")
def save_uploaded_file(uploaded_file):
file_path = PDF_STORAGE_PATH + uploaded_file.name
with open(file_path, "wb") as file:
file.write(uploaded_file.getbuffer())
return file_path
def load_pdf_documents(file_path):
document_loader = PDFPlumberLoader(file_path)
return document_loader.load()
def chunk_documents(raw_documents):
text_processor = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True
)
return text_processor.split_documents(raw_documents)
def index_documents(document_chunks):
DOCUMENT_VECTOR_DB.add_documents(document_chunks)
def find_related_documents(query):
return DOCUMENT_VECTOR_DB.similarity_search(query)
def generate_answer(user_query, context_documents):
context_text = "\n\n".join([doc.page_content for doc in context_documents])
conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
response_chain = conversation_prompt | LANGUAGE_MODEL
return response_chain.invoke({"user_query": user_query, "document_context": context_text})
# UI Configuration
st.title("π SmartDoc AI")
st.markdown("### AI-Powered Document Assistant")
st.markdown("---")
# File Upload Section
uploaded_pdf = st.file_uploader(
"Upload Research Document (PDF)",
type="pdf",
help="Select a PDF document for analysis",
accept_multiple_files=False
)
if uploaded_pdf:
saved_path = save_uploaded_file(uploaded_pdf)
raw_docs = load_pdf_documents(saved_path)
processed_chunks = chunk_documents(raw_docs)
index_documents(processed_chunks)
st.success("β
Document processed successfully! Ask your questions below.")
user_input = st.chat_input("Enter your question about the document...")
if user_input:
with st.chat_message("user"):
st.write(user_input)
with st.spinner("Analyzing document..."):
relevant_docs = find_related_documents(user_input)
ai_response = generate_answer(user_input, relevant_docs)
with st.chat_message("assistant", avatar="π€"):
st.write(ai_response)
|