Spaces:

drkareemkamal
/

pediatric_RAG

Sleeping

App Files Files Community

drkareemkamal commited on 13 days ago

Commit

628ef08

verified ·

1 Parent(s): 35823f3

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -36

app.py CHANGED Viewed

@@ -1,32 +1,40 @@
 import os
 import streamlit as st
-from langchain_community.document_loaders.pdf import PDFPlumberLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_core.vectorstores import InMemoryVectorStore
-from langchain_ollama import OllamaEmbeddings
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_ollama.llms import OllamaLLM
-# ======== Configurations ========
-pdfs_directory = 'pdfs/'  # Change to your server directory
 os.makedirs(pdfs_directory, exist_ok=True)
-PREDEFINED_BOOKS = [file for file in os.listdir(pdfs_directory) if file.endswith('.pdf')]
 TEMPLATE = """
-You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
 Question: {question}
 Context: {context}
 Answer:
 """
-# ======== Initialize Embeddings and Vector Store ========
-embeddings = OllamaEmbeddings(model="deepseek-r1:14b")
-vector_store = InMemoryVectorStore(embeddings)
-model = OllamaLLM(model="deepseek-r1:14b")
-# ======== Functions ========
 def upload_pdf(file):
     save_path = os.path.join(pdfs_directory, file.name)
     with open(save_path, "wb") as f:
@@ -38,32 +46,32 @@ def load_pdf(file_path):
     return loader.load()
 def split_text(documents):
-    text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200,
         add_start_index=True
     )
-    return text_splitter.split_documents(documents)
-def index_docs(documents):
-    vector_store.add_documents(documents)
-def retrieve_docs(query):
     return vector_store.similarity_search(query)
 def answer_question(question, documents):
-    context = "\n\n".join([doc.page_content for doc in documents])
     prompt = ChatPromptTemplate.from_template(TEMPLATE)
-    chain = prompt | model
-    return chain.invoke({"question": question, "context": context})
-# ======== Streamlit UI ========
-st.title("📄 Chat with Books (Server Ready)")
 with st.sidebar:
-    st.header("Select or Upload Book")
-    selected_book = st.selectbox("Choose a PDF book:", PREDEFINED_BOOKS + ["Upload new book"])
     if selected_book == "Upload new book":
         uploaded_file = st.file_uploader("Upload PDF", type="pdf")
@@ -73,18 +81,16 @@ with st.sidebar:
             selected_book = filename
 if selected_book and selected_book != "Upload new book":
     file_path = os.path.join(pdfs_directory, selected_book)
-    st.info(f"📄 Selected Book: {selected_book}")
-    # Load, split, and index
     documents = load_pdf(file_path)
-    chunked_documents = split_text(documents)
-    index_docs(chunked_documents)
-    # Chat input
-    question = st.chat_input("Ask something about the book...")
     if question:
         st.chat_message("user").write(question)
-        related_documents = retrieve_docs(question)
-        answer = answer_question(question, related_documents)
         st.chat_message("assistant").write(answer)

 import os
 import streamlit as st
+from langchain_community.document_loaders import PDFPlumberLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.prompts import ChatPromptTemplate
+from langchain.chains import LLMChain
+from langchain.llms import CTransformers
+# === Configuration ===
+pdfs_directory = '/pdfs'
 os.makedirs(pdfs_directory, exist_ok=True)
+PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")]
 TEMPLATE = """
+You are a helpful assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
+If you don't know the answer, say "I don't know". Limit your answer to three concise sentences.
 Question: {question}
 Context: {context}
 Answer:
 """
+# === Load Embeddings (CPU Friendly) ===
+embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
+# === LLM (Quantized, CPU Efficient) ===
+llm = CTransformers(
+    model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
+    model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf',
+    model_type='mistral',
+    config={'max_new_tokens': 512, 'temperature': 0.5}
+)
+# === Functions ===
 def upload_pdf(file):
     save_path = os.path.join(pdfs_directory, file.name)
     with open(save_path, "wb") as f:
     return loader.load()
 def split_text(documents):
+    splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200,
         add_start_index=True
     )
+    return splitter.split_documents(documents)
+def create_vector_store(docs):
+    return FAISS.from_documents(docs, embedding_model)
+def retrieve_docs(vector_store, query):
     return vector_store.similarity_search(query)
 def answer_question(question, documents):
+    context = "\n\n".join(doc.page_content for doc in documents)
     prompt = ChatPromptTemplate.from_template(TEMPLATE)
+    chain = LLMChain(llm=llm, prompt=prompt)
+    return chain.run({"question": question, "context": context})
+# === UI ===
+st.set_page_config(page_title="📄 PDF Q&A (CPU Version)", layout="centered")
+st.title("📚 Chat with PDF - CPU Optimized")
 with st.sidebar:
+    st.header("Select or Upload a Book")
+    selected_book = st.selectbox("Choose a PDF", PREDEFINED_BOOKS + ["Upload new book"])
     if selected_book == "Upload new book":
         uploaded_file = st.file_uploader("Upload PDF", type="pdf")
             selected_book = filename
 if selected_book and selected_book != "Upload new book":
+    st.info(f"📖 You selected: {selected_book}")
     file_path = os.path.join(pdfs_directory, selected_book)
     documents = load_pdf(file_path)
+    chunks = split_text(documents)
+    vector_store = create_vector_store(chunks)
+    question = st.chat_input("Ask a question about the book...")
     if question:
         st.chat_message("user").write(question)
+        related_docs = retrieve_docs(vector_store, question)
+        answer = answer_question(question, related_docs)
         st.chat_message("assistant").write(answer)