Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,32 +1,40 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
-
from langchain_community.document_loaders
|
4 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
12 |
os.makedirs(pdfs_directory, exist_ok=True)
|
13 |
|
14 |
-
PREDEFINED_BOOKS = [
|
15 |
|
16 |
TEMPLATE = """
|
17 |
-
You are
|
|
|
|
|
18 |
Question: {question}
|
19 |
Context: {context}
|
20 |
Answer:
|
21 |
"""
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
vector_store = InMemoryVectorStore(embeddings)
|
26 |
-
model = OllamaLLM(model="deepseek-r1:14b")
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
#
|
30 |
def upload_pdf(file):
|
31 |
save_path = os.path.join(pdfs_directory, file.name)
|
32 |
with open(save_path, "wb") as f:
|
@@ -38,32 +46,32 @@ def load_pdf(file_path):
|
|
38 |
return loader.load()
|
39 |
|
40 |
def split_text(documents):
|
41 |
-
|
42 |
chunk_size=1000,
|
43 |
chunk_overlap=200,
|
44 |
add_start_index=True
|
45 |
)
|
46 |
-
return
|
47 |
|
48 |
-
def
|
49 |
-
|
50 |
|
51 |
-
def retrieve_docs(query):
|
52 |
return vector_store.similarity_search(query)
|
53 |
|
54 |
def answer_question(question, documents):
|
55 |
-
context = "\n\n".join(
|
56 |
prompt = ChatPromptTemplate.from_template(TEMPLATE)
|
57 |
-
chain = prompt
|
58 |
-
return chain.
|
59 |
-
|
60 |
|
61 |
-
#
|
62 |
-
st.
|
|
|
63 |
|
64 |
with st.sidebar:
|
65 |
-
st.header("Select or Upload Book")
|
66 |
-
selected_book = st.selectbox("Choose a PDF
|
67 |
|
68 |
if selected_book == "Upload new book":
|
69 |
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
|
@@ -73,18 +81,16 @@ with st.sidebar:
|
|
73 |
selected_book = filename
|
74 |
|
75 |
if selected_book and selected_book != "Upload new book":
|
|
|
76 |
file_path = os.path.join(pdfs_directory, selected_book)
|
77 |
-
st.info(f"π Selected Book: {selected_book}")
|
78 |
|
79 |
-
# Load, split, and index
|
80 |
documents = load_pdf(file_path)
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
question = st.chat_input("Ask something about the book...")
|
86 |
if question:
|
87 |
st.chat_message("user").write(question)
|
88 |
-
|
89 |
-
answer = answer_question(question,
|
90 |
st.chat_message("assistant").write(answer)
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
+
from langchain_community.document_loaders import PDFPlumberLoader
|
4 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.vectorstores import FAISS
|
6 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.prompts import ChatPromptTemplate
|
8 |
+
from langchain.chains import LLMChain
|
9 |
+
from langchain.llms import CTransformers
|
10 |
+
|
11 |
+
# === Configuration ===
|
12 |
+
pdfs_directory = '/pdfs'
|
13 |
os.makedirs(pdfs_directory, exist_ok=True)
|
14 |
|
15 |
+
PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")]
|
16 |
|
17 |
TEMPLATE = """
|
18 |
+
You are a helpful assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
|
19 |
+
If you don't know the answer, say "I don't know". Limit your answer to three concise sentences.
|
20 |
+
|
21 |
Question: {question}
|
22 |
Context: {context}
|
23 |
Answer:
|
24 |
"""
|
25 |
|
26 |
+
# === Load Embeddings (CPU Friendly) ===
|
27 |
+
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
|
|
|
|
|
28 |
|
29 |
+
# === LLM (Quantized, CPU Efficient) ===
|
30 |
+
llm = CTransformers(
|
31 |
+
model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
|
32 |
+
model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf',
|
33 |
+
model_type='mistral',
|
34 |
+
config={'max_new_tokens': 512, 'temperature': 0.5}
|
35 |
+
)
|
36 |
|
37 |
+
# === Functions ===
|
38 |
def upload_pdf(file):
|
39 |
save_path = os.path.join(pdfs_directory, file.name)
|
40 |
with open(save_path, "wb") as f:
|
|
|
46 |
return loader.load()
|
47 |
|
48 |
def split_text(documents):
|
49 |
+
splitter = RecursiveCharacterTextSplitter(
|
50 |
chunk_size=1000,
|
51 |
chunk_overlap=200,
|
52 |
add_start_index=True
|
53 |
)
|
54 |
+
return splitter.split_documents(documents)
|
55 |
|
56 |
+
def create_vector_store(docs):
|
57 |
+
return FAISS.from_documents(docs, embedding_model)
|
58 |
|
59 |
+
def retrieve_docs(vector_store, query):
|
60 |
return vector_store.similarity_search(query)
|
61 |
|
62 |
def answer_question(question, documents):
|
63 |
+
context = "\n\n".join(doc.page_content for doc in documents)
|
64 |
prompt = ChatPromptTemplate.from_template(TEMPLATE)
|
65 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
66 |
+
return chain.run({"question": question, "context": context})
|
|
|
67 |
|
68 |
+
# === UI ===
|
69 |
+
st.set_page_config(page_title="π PDF Q&A (CPU Version)", layout="centered")
|
70 |
+
st.title("π Chat with PDF - CPU Optimized")
|
71 |
|
72 |
with st.sidebar:
|
73 |
+
st.header("Select or Upload a Book")
|
74 |
+
selected_book = st.selectbox("Choose a PDF", PREDEFINED_BOOKS + ["Upload new book"])
|
75 |
|
76 |
if selected_book == "Upload new book":
|
77 |
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
|
|
|
81 |
selected_book = filename
|
82 |
|
83 |
if selected_book and selected_book != "Upload new book":
|
84 |
+
st.info(f"π You selected: {selected_book}")
|
85 |
file_path = os.path.join(pdfs_directory, selected_book)
|
|
|
86 |
|
|
|
87 |
documents = load_pdf(file_path)
|
88 |
+
chunks = split_text(documents)
|
89 |
+
vector_store = create_vector_store(chunks)
|
90 |
|
91 |
+
question = st.chat_input("Ask a question about the book...")
|
|
|
92 |
if question:
|
93 |
st.chat_message("user").write(question)
|
94 |
+
related_docs = retrieve_docs(vector_store, question)
|
95 |
+
answer = answer_question(question, related_docs)
|
96 |
st.chat_message("assistant").write(answer)
|