drkareemkamal commited on
Commit
628ef08
Β·
verified Β·
1 Parent(s): 35823f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -36
app.py CHANGED
@@ -1,32 +1,40 @@
1
  import os
2
  import streamlit as st
3
- from langchain_community.document_loaders.pdf import PDFPlumberLoader
4
  from langchain_text_splitters import RecursiveCharacterTextSplitter
5
- from langchain_core.vectorstores import InMemoryVectorStore
6
- from langchain_ollama import OllamaEmbeddings
7
- from langchain_core.prompts import ChatPromptTemplate
8
- from langchain_ollama.llms import OllamaLLM
9
-
10
- # ======== Configurations ========
11
- pdfs_directory = 'pdfs/' # Change to your server directory
 
12
  os.makedirs(pdfs_directory, exist_ok=True)
13
 
14
- PREDEFINED_BOOKS = [file for file in os.listdir(pdfs_directory) if file.endswith('.pdf')]
15
 
16
  TEMPLATE = """
17
- You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
 
 
18
  Question: {question}
19
  Context: {context}
20
  Answer:
21
  """
22
 
23
- # ======== Initialize Embeddings and Vector Store ========
24
- embeddings = OllamaEmbeddings(model="deepseek-r1:14b")
25
- vector_store = InMemoryVectorStore(embeddings)
26
- model = OllamaLLM(model="deepseek-r1:14b")
27
 
 
 
 
 
 
 
 
28
 
29
- # ======== Functions ========
30
  def upload_pdf(file):
31
  save_path = os.path.join(pdfs_directory, file.name)
32
  with open(save_path, "wb") as f:
@@ -38,32 +46,32 @@ def load_pdf(file_path):
38
  return loader.load()
39
 
40
  def split_text(documents):
41
- text_splitter = RecursiveCharacterTextSplitter(
42
  chunk_size=1000,
43
  chunk_overlap=200,
44
  add_start_index=True
45
  )
46
- return text_splitter.split_documents(documents)
47
 
48
- def index_docs(documents):
49
- vector_store.add_documents(documents)
50
 
51
- def retrieve_docs(query):
52
  return vector_store.similarity_search(query)
53
 
54
  def answer_question(question, documents):
55
- context = "\n\n".join([doc.page_content for doc in documents])
56
  prompt = ChatPromptTemplate.from_template(TEMPLATE)
57
- chain = prompt | model
58
- return chain.invoke({"question": question, "context": context})
59
-
60
 
61
- # ======== Streamlit UI ========
62
- st.title("πŸ“„ Chat with Books (Server Ready)")
 
63
 
64
  with st.sidebar:
65
- st.header("Select or Upload Book")
66
- selected_book = st.selectbox("Choose a PDF book:", PREDEFINED_BOOKS + ["Upload new book"])
67
 
68
  if selected_book == "Upload new book":
69
  uploaded_file = st.file_uploader("Upload PDF", type="pdf")
@@ -73,18 +81,16 @@ with st.sidebar:
73
  selected_book = filename
74
 
75
  if selected_book and selected_book != "Upload new book":
 
76
  file_path = os.path.join(pdfs_directory, selected_book)
77
- st.info(f"πŸ“„ Selected Book: {selected_book}")
78
 
79
- # Load, split, and index
80
  documents = load_pdf(file_path)
81
- chunked_documents = split_text(documents)
82
- index_docs(chunked_documents)
83
 
84
- # Chat input
85
- question = st.chat_input("Ask something about the book...")
86
  if question:
87
  st.chat_message("user").write(question)
88
- related_documents = retrieve_docs(question)
89
- answer = answer_question(question, related_documents)
90
  st.chat_message("assistant").write(answer)
 
1
  import os
2
  import streamlit as st
3
+ from langchain_community.document_loaders import PDFPlumberLoader
4
  from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain.chains import LLMChain
9
+ from langchain.llms import CTransformers
10
+
11
+ # === Configuration ===
12
+ pdfs_directory = '/pdfs'
13
  os.makedirs(pdfs_directory, exist_ok=True)
14
 
15
+ PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")]
16
 
17
  TEMPLATE = """
18
+ You are a helpful assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
19
+ If you don't know the answer, say "I don't know". Limit your answer to three concise sentences.
20
+
21
  Question: {question}
22
  Context: {context}
23
  Answer:
24
  """
25
 
26
+ # === Load Embeddings (CPU Friendly) ===
27
+ embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
 
 
28
 
29
+ # === LLM (Quantized, CPU Efficient) ===
30
+ llm = CTransformers(
31
+ model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
32
+ model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf',
33
+ model_type='mistral',
34
+ config={'max_new_tokens': 512, 'temperature': 0.5}
35
+ )
36
 
37
+ # === Functions ===
38
  def upload_pdf(file):
39
  save_path = os.path.join(pdfs_directory, file.name)
40
  with open(save_path, "wb") as f:
 
46
  return loader.load()
47
 
48
  def split_text(documents):
49
+ splitter = RecursiveCharacterTextSplitter(
50
  chunk_size=1000,
51
  chunk_overlap=200,
52
  add_start_index=True
53
  )
54
+ return splitter.split_documents(documents)
55
 
56
+ def create_vector_store(docs):
57
+ return FAISS.from_documents(docs, embedding_model)
58
 
59
+ def retrieve_docs(vector_store, query):
60
  return vector_store.similarity_search(query)
61
 
62
  def answer_question(question, documents):
63
+ context = "\n\n".join(doc.page_content for doc in documents)
64
  prompt = ChatPromptTemplate.from_template(TEMPLATE)
65
+ chain = LLMChain(llm=llm, prompt=prompt)
66
+ return chain.run({"question": question, "context": context})
 
67
 
68
+ # === UI ===
69
+ st.set_page_config(page_title="πŸ“„ PDF Q&A (CPU Version)", layout="centered")
70
+ st.title("πŸ“š Chat with PDF - CPU Optimized")
71
 
72
  with st.sidebar:
73
+ st.header("Select or Upload a Book")
74
+ selected_book = st.selectbox("Choose a PDF", PREDEFINED_BOOKS + ["Upload new book"])
75
 
76
  if selected_book == "Upload new book":
77
  uploaded_file = st.file_uploader("Upload PDF", type="pdf")
 
81
  selected_book = filename
82
 
83
  if selected_book and selected_book != "Upload new book":
84
+ st.info(f"πŸ“– You selected: {selected_book}")
85
  file_path = os.path.join(pdfs_directory, selected_book)
 
86
 
 
87
  documents = load_pdf(file_path)
88
+ chunks = split_text(documents)
89
+ vector_store = create_vector_store(chunks)
90
 
91
+ question = st.chat_input("Ask a question about the book...")
 
92
  if question:
93
  st.chat_message("user").write(question)
94
+ related_docs = retrieve_docs(vector_store, question)
95
+ answer = answer_question(question, related_docs)
96
  st.chat_message("assistant").write(answer)