Spaces:

Maxjohn12
/

SmartDocAI

Sleeping

File size: 4,881 Bytes

0f0eea2

import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

st.markdown("""

    <style>

    .stApp {

        background-color: #121826;  /* Deep Navy Blue */

        color: #EAEAEA;  /* Soft White */

    }

    

    /* Chat Input Styling */

    .stChatInput input {

        background-color: #1A2238 !important;  /* Dark Blue */

        color: #F5F5F5 !important;  /* Light Gray */

        border: 1px solid #3E4C72 !important;  /* Muted Blue */

    }

    

    /* User Message Styling */

    .stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {

        background-color: #1F2A44 !important;  /* Dark Blue Gray */

        border: 1px solid #4A5C89 !important;  /* Subtle Blue */

        color: #D1D5DB !important;  /* Soft White */

        border-radius: 10px;

        padding: 15px;

        margin: 10px 0;

    }

    

    /* Assistant Message Styling */

    .stChatMessage[data-testid="stChatMessage"]:nth-child(even) {

        background-color: #253350 !important;  /* Rich Deep Blue */

        border: 1px solid #5C6FA9 !important;  /* Light Blue Accent */

        color: #F3F4F6 !important;  /* Soft White */

        border-radius: 10px;

        padding: 15px;

        margin: 10px 0;

    }

    

    /* Avatar Styling */

    .stChatMessage .avatar {

        background-color: #4CAF50 !important;  /* Vibrant Green */

        color: #FFFFFF !important;  /* White */

    }

    

    /* Text Color Fix */

    .stChatMessage p, .stChatMessage div {

        color: #EAEAEA !important;  /* Soft White */

    }

    

    .stFileUploader {

        background-color: #1A2238;

        border: 1px solid #4A5C89;

        border-radius: 5px;

        padding: 15px;

    }

    

    h1, h2, h3 {

        color: #4CAF50 !important;  /* Green Accent */

    }

</style>



    """, unsafe_allow_html=True)

PROMPT_TEMPLATE = """

You are an expert research assistant. Use the provided context to answer the query. 

If unsure, state that you don't know. Be concise and factual (max 3 sentences).



Query: {user_query} 

Context: {document_context} 

Answer:

"""
PDF_STORAGE_PATH = 'document_store/pdfs/'
EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")


def save_uploaded_file(uploaded_file):
    file_path = PDF_STORAGE_PATH + uploaded_file.name
    with open(file_path, "wb") as file:
        file.write(uploaded_file.getbuffer())
    return file_path

def load_pdf_documents(file_path):
    document_loader = PDFPlumberLoader(file_path)
    return document_loader.load()

def chunk_documents(raw_documents):
    text_processor = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        add_start_index=True
    )
    return text_processor.split_documents(raw_documents)

def index_documents(document_chunks):
    DOCUMENT_VECTOR_DB.add_documents(document_chunks)

def find_related_documents(query):
    return DOCUMENT_VECTOR_DB.similarity_search(query)

def generate_answer(user_query, context_documents):
    context_text = "\n\n".join([doc.page_content for doc in context_documents])
    conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    response_chain = conversation_prompt | LANGUAGE_MODEL
    return response_chain.invoke({"user_query": user_query, "document_context": context_text})


# UI Configuration


st.title("📘 SmartDoc AI")
st.markdown("### AI-Powered Document Assistant")
st.markdown("---")

# File Upload Section
uploaded_pdf = st.file_uploader(
    "Upload Research Document (PDF)",
    type="pdf",
    help="Select a PDF document for analysis",
    accept_multiple_files=False

)

if uploaded_pdf:
    saved_path = save_uploaded_file(uploaded_pdf)
    raw_docs = load_pdf_documents(saved_path)
    processed_chunks = chunk_documents(raw_docs)
    index_documents(processed_chunks)
    
    st.success("✅ Document processed successfully! Ask your questions below.")
    
    user_input = st.chat_input("Enter your question about the document...")
    
    if user_input:
        with st.chat_message("user"):
            st.write(user_input)
        
        with st.spinner("Analyzing document..."):
            relevant_docs = find_related_documents(user_input)
            ai_response = generate_answer(user_input, relevant_docs)
            
        with st.chat_message("assistant", avatar="🤖"):
            st.write(ai_response)