drkareemkamal commited on
Commit
607c59b
·
verified ·
1 Parent(s): 41a7dcd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain_community.document_loaders import PDFPlumberLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_core.vectorstores import InMemoryVectorStore
6
+ from langchain_ollama import OllamaEmbeddings
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_ollama.llms import OllamaLLM
9
+
10
+ # ======== Configurations ========
11
+ pdfs_directory = 'pdfs/' # Change to your server directory
12
+ os.makedirs(pdfs_directory, exist_ok=True)
13
+
14
+ PREDEFINED_BOOKS = [file for file in os.listdir(pdfs_directory) if file.endswith('.pdf')]
15
+
16
+ TEMPLATE = """
17
+ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
18
+ Question: {question}
19
+ Context: {context}
20
+ Answer:
21
+ """
22
+
23
+ # ======== Initialize Embeddings and Vector Store ========
24
+ embeddings = OllamaEmbeddings(model="deepseek-r1:14b")
25
+ vector_store = InMemoryVectorStore(embeddings)
26
+ model = OllamaLLM(model="deepseek-r1:14b")
27
+
28
+
29
+ # ======== Functions ========
30
+ def upload_pdf(file):
31
+ save_path = os.path.join(pdfs_directory, file.name)
32
+ with open(save_path, "wb") as f:
33
+ f.write(file.getbuffer())
34
+ return file.name
35
+
36
+ def load_pdf(file_path):
37
+ loader = PDFPlumberLoader(file_path)
38
+ return loader.load()
39
+
40
+ def split_text(documents):
41
+ text_splitter = RecursiveCharacterTextSplitter(
42
+ chunk_size=1000,
43
+ chunk_overlap=200,
44
+ add_start_index=True
45
+ )
46
+ return text_splitter.split_documents(documents)
47
+
48
+ def index_docs(documents):
49
+ vector_store.add_documents(documents)
50
+
51
+ def retrieve_docs(query):
52
+ return vector_store.similarity_search(query)
53
+
54
+ def answer_question(question, documents):
55
+ context = "\n\n".join([doc.page_content for doc in documents])
56
+ prompt = ChatPromptTemplate.from_template(TEMPLATE)
57
+ chain = prompt | model
58
+ return chain.invoke({"question": question, "context": context})
59
+
60
+
61
+ # ======== Streamlit UI ========
62
+ st.title("📄 Chat with Books (Server Ready)")
63
+
64
+ with st.sidebar:
65
+ st.header("Select or Upload Book")
66
+ selected_book = st.selectbox("Choose a PDF book:", PREDEFINED_BOOKS + ["Upload new book"])
67
+
68
+ if selected_book == "Upload new book":
69
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf")
70
+ if uploaded_file:
71
+ filename = upload_pdf(uploaded_file)
72
+ st.success(f"Uploaded: {filename}")
73
+ selected_book = filename
74
+
75
+ if selected_book and selected_book != "Upload new book":
76
+ file_path = os.path.join(pdfs_directory, selected_book)
77
+ st.info(f"📄 Selected Book: {selected_book}")
78
+
79
+ # Load, split, and index
80
+ documents = load_pdf(file_path)
81
+ chunked_documents = split_text(documents)
82
+ index_docs(chunked_documents)
83
+
84
+ # Chat input
85
+ question = st.chat_input("Ask something about the book...")
86
+ if question:
87
+ st.chat_message("user").write(question)
88
+ related_documents = retrieve_docs(question)
89
+ answer = answer_question(question, related_documents)
90
+ st.chat_message("assistant").write(answer)