Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from langchain_community.document_loaders import PDFPlumberLoader
|
4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
+
from langchain_core.vectorstores import InMemoryVectorStore
|
6 |
+
from langchain_ollama import OllamaEmbeddings
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_ollama.llms import OllamaLLM
|
9 |
+
|
10 |
+
# ======== Configurations ========
|
11 |
+
pdfs_directory = 'pdfs/' # Change to your server directory
|
12 |
+
os.makedirs(pdfs_directory, exist_ok=True)
|
13 |
+
|
14 |
+
PREDEFINED_BOOKS = [file for file in os.listdir(pdfs_directory) if file.endswith('.pdf')]
|
15 |
+
|
16 |
+
TEMPLATE = """
|
17 |
+
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
|
18 |
+
Question: {question}
|
19 |
+
Context: {context}
|
20 |
+
Answer:
|
21 |
+
"""
|
22 |
+
|
23 |
+
# ======== Initialize Embeddings and Vector Store ========
|
24 |
+
embeddings = OllamaEmbeddings(model="deepseek-r1:14b")
|
25 |
+
vector_store = InMemoryVectorStore(embeddings)
|
26 |
+
model = OllamaLLM(model="deepseek-r1:14b")
|
27 |
+
|
28 |
+
|
29 |
+
# ======== Functions ========
|
30 |
+
def upload_pdf(file):
|
31 |
+
save_path = os.path.join(pdfs_directory, file.name)
|
32 |
+
with open(save_path, "wb") as f:
|
33 |
+
f.write(file.getbuffer())
|
34 |
+
return file.name
|
35 |
+
|
36 |
+
def load_pdf(file_path):
|
37 |
+
loader = PDFPlumberLoader(file_path)
|
38 |
+
return loader.load()
|
39 |
+
|
40 |
+
def split_text(documents):
|
41 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
42 |
+
chunk_size=1000,
|
43 |
+
chunk_overlap=200,
|
44 |
+
add_start_index=True
|
45 |
+
)
|
46 |
+
return text_splitter.split_documents(documents)
|
47 |
+
|
48 |
+
def index_docs(documents):
|
49 |
+
vector_store.add_documents(documents)
|
50 |
+
|
51 |
+
def retrieve_docs(query):
|
52 |
+
return vector_store.similarity_search(query)
|
53 |
+
|
54 |
+
def answer_question(question, documents):
|
55 |
+
context = "\n\n".join([doc.page_content for doc in documents])
|
56 |
+
prompt = ChatPromptTemplate.from_template(TEMPLATE)
|
57 |
+
chain = prompt | model
|
58 |
+
return chain.invoke({"question": question, "context": context})
|
59 |
+
|
60 |
+
|
61 |
+
# ======== Streamlit UI ========
|
62 |
+
st.title("📄 Chat with Books (Server Ready)")
|
63 |
+
|
64 |
+
with st.sidebar:
|
65 |
+
st.header("Select or Upload Book")
|
66 |
+
selected_book = st.selectbox("Choose a PDF book:", PREDEFINED_BOOKS + ["Upload new book"])
|
67 |
+
|
68 |
+
if selected_book == "Upload new book":
|
69 |
+
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
|
70 |
+
if uploaded_file:
|
71 |
+
filename = upload_pdf(uploaded_file)
|
72 |
+
st.success(f"Uploaded: {filename}")
|
73 |
+
selected_book = filename
|
74 |
+
|
75 |
+
if selected_book and selected_book != "Upload new book":
|
76 |
+
file_path = os.path.join(pdfs_directory, selected_book)
|
77 |
+
st.info(f"📄 Selected Book: {selected_book}")
|
78 |
+
|
79 |
+
# Load, split, and index
|
80 |
+
documents = load_pdf(file_path)
|
81 |
+
chunked_documents = split_text(documents)
|
82 |
+
index_docs(chunked_documents)
|
83 |
+
|
84 |
+
# Chat input
|
85 |
+
question = st.chat_input("Ask something about the book...")
|
86 |
+
if question:
|
87 |
+
st.chat_message("user").write(question)
|
88 |
+
related_documents = retrieve_docs(question)
|
89 |
+
answer = answer_question(question, related_documents)
|
90 |
+
st.chat_message("assistant").write(answer)
|