File size: 4,881 Bytes
0f0eea2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

st.markdown("""

    <style>

    .stApp {

        background-color: #121826;  /* Deep Navy Blue */

        color: #EAEAEA;  /* Soft White */

    }

    

    /* Chat Input Styling */

    .stChatInput input {

        background-color: #1A2238 !important;  /* Dark Blue */

        color: #F5F5F5 !important;  /* Light Gray */

        border: 1px solid #3E4C72 !important;  /* Muted Blue */

    }

    

    /* User Message Styling */

    .stChatMessage[data-testid="stChatMessage"]:nth-child(odd) {

        background-color: #1F2A44 !important;  /* Dark Blue Gray */

        border: 1px solid #4A5C89 !important;  /* Subtle Blue */

        color: #D1D5DB !important;  /* Soft White */

        border-radius: 10px;

        padding: 15px;

        margin: 10px 0;

    }

    

    /* Assistant Message Styling */

    .stChatMessage[data-testid="stChatMessage"]:nth-child(even) {

        background-color: #253350 !important;  /* Rich Deep Blue */

        border: 1px solid #5C6FA9 !important;  /* Light Blue Accent */

        color: #F3F4F6 !important;  /* Soft White */

        border-radius: 10px;

        padding: 15px;

        margin: 10px 0;

    }

    

    /* Avatar Styling */

    .stChatMessage .avatar {

        background-color: #4CAF50 !important;  /* Vibrant Green */

        color: #FFFFFF !important;  /* White */

    }

    

    /* Text Color Fix */

    .stChatMessage p, .stChatMessage div {

        color: #EAEAEA !important;  /* Soft White */

    }

    

    .stFileUploader {

        background-color: #1A2238;

        border: 1px solid #4A5C89;

        border-radius: 5px;

        padding: 15px;

    }

    

    h1, h2, h3 {

        color: #4CAF50 !important;  /* Green Accent */

    }

</style>



    """, unsafe_allow_html=True)

PROMPT_TEMPLATE = """

You are an expert research assistant. Use the provided context to answer the query. 

If unsure, state that you don't know. Be concise and factual (max 3 sentences).



Query: {user_query} 

Context: {document_context} 

Answer:

"""
PDF_STORAGE_PATH = 'document_store/pdfs/'
EMBEDDING_MODEL = OllamaEmbeddings(model="deepseek-r1:1.5b")
DOCUMENT_VECTOR_DB = InMemoryVectorStore(EMBEDDING_MODEL)
LANGUAGE_MODEL = OllamaLLM(model="deepseek-r1:1.5b")


def save_uploaded_file(uploaded_file):
    file_path = PDF_STORAGE_PATH + uploaded_file.name
    with open(file_path, "wb") as file:
        file.write(uploaded_file.getbuffer())
    return file_path

def load_pdf_documents(file_path):
    document_loader = PDFPlumberLoader(file_path)
    return document_loader.load()

def chunk_documents(raw_documents):
    text_processor = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        add_start_index=True
    )
    return text_processor.split_documents(raw_documents)

def index_documents(document_chunks):
    DOCUMENT_VECTOR_DB.add_documents(document_chunks)

def find_related_documents(query):
    return DOCUMENT_VECTOR_DB.similarity_search(query)

def generate_answer(user_query, context_documents):
    context_text = "\n\n".join([doc.page_content for doc in context_documents])
    conversation_prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    response_chain = conversation_prompt | LANGUAGE_MODEL
    return response_chain.invoke({"user_query": user_query, "document_context": context_text})


# UI Configuration


st.title("πŸ“˜ SmartDoc AI")
st.markdown("### AI-Powered Document Assistant")
st.markdown("---")

# File Upload Section
uploaded_pdf = st.file_uploader(
    "Upload Research Document (PDF)",
    type="pdf",
    help="Select a PDF document for analysis",
    accept_multiple_files=False

)

if uploaded_pdf:
    saved_path = save_uploaded_file(uploaded_pdf)
    raw_docs = load_pdf_documents(saved_path)
    processed_chunks = chunk_documents(raw_docs)
    index_documents(processed_chunks)
    
    st.success("βœ… Document processed successfully! Ask your questions below.")
    
    user_input = st.chat_input("Enter your question about the document...")
    
    if user_input:
        with st.chat_message("user"):
            st.write(user_input)
        
        with st.spinner("Analyzing document..."):
            relevant_docs = find_related_documents(user_input)
            ai_response = generate_answer(user_input, relevant_docs)
            
        with st.chat_message("assistant", avatar="πŸ€–"):
            st.write(ai_response)