Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Jun 5, 2024

Commit

1649416

verified ·

1 Parent(s): a42468d

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -88

app.py CHANGED Viewed

@@ -1,36 +1,32 @@
-import os
-import io
-import fitz  # PyMuPDF
-import PyPDF2
-from docx import Document
 import streamlit as st
-from sentence_transformers import SentenceTransformer
 from langchain.prompts import PromptTemplate
 from langchain.chains.question_answering import load_qa_chain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores.faiss import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.llms import HuggingFaceEndpoint
-# Initialize the embedding model
-embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Initialize the HuggingFace LLM
-llm = HuggingFaceEndpoint(
-    endpoint_url="https://api-inference.huggingface.co/models/gpt-3.5-turbo",
-    model_kwargs={"api_key": "YOUR_HUGGINGFACE_API_KEY"}
-)
-# Initialize the HuggingFace embeddings
-embedding = HuggingFaceEmbeddings()
-# Streamlit setup
-st.set_page_config(layout="centered")
-st.markdown("<h1 style='font-size:24px;'>PDF and DOCX ChatBot</h1>", unsafe_allow_html=True)
-# File Upload
-uploaded_file = st.file_uploader("Upload your PDF or DOCX file", type=["pdf", "docx"])
 prompt_template = """
 Answer the question as detailed as possible from the provided context,
 make sure to provide all the details, if the answer is not in
@@ -41,82 +37,62 @@ Question: \n{question}\n
 Answer:
 """
 prompt_template += """
 --------------------------------------------------
 Prompt Suggestions:
 1. Summarize the primary theme of the context.
 2. Elaborate on the crucial concepts highlighted in the context.
-3. Pinpoint any supporting details or examples pertinent to the question.
-4. Examine any recurring themes or patterns relevant to the question within the context.
-5. Contrast differing viewpoints or elements mentioned in the context.
-6. Explore the potential implications or outcomes of the information provided.
-7. Assess the trustworthiness and validity of the information given.
-8. Propose recommendations or advice based on the presented information.
-9. Forecast likely future events or results stemming from the context.
-10. Expand on the context or background information pertinent to the question.
-11. Define any specialized terms or technical language used within the context.
-12. Analyze any visual representations like charts or graphs in the context.
-13. Highlight any restrictions or important considerations when responding to the question.
-14. Examine any presuppositions or biases evident within the context.
-15. Present alternate interpretations or viewpoints regarding the information provided.
-16. Reflect on any moral or ethical issues raised by the context.
-17. Investigate any cause-and-effect relationships identified in the context.
-18. Uncover any questions or areas requiring further exploration.
-19. Resolve any vague or conflicting information in the context.
 20. Cite case studies or examples that demonstrate the concepts discussed in the context.
---------------------------------------------------
-Context:\n{context}\n
-Question:\n{question}\n
-Answer:
 """
-def extract_text_from_docx(docx_file):
-    text = ""
-    try:
-        doc = Document(docx_file)
-        text = "\n".join([para.text for para in doc.paragraphs])
-    except Exception as e:
-        print(f"Error extracting text from DOCX: {e}")
-    return text
-def extract_text_from_pdf(pdf_file):
-    text = ""
-    try:
-        pdf_document = fitz.open(stream=pdf_file, filetype="pdf")
-        for page_num in range(len(pdf_document)):
-            page = pdf_document[page_num]
-            text += page.get_text()
-    except Exception as e:
-        print(f"Error extracting text from PDF: {e}")
-    return text
-if uploaded_file is not None:
-    st.text("File Uploaded Successfully!")
-    context = ""
-    # Process the uploaded file
-    if uploaded_file.name.endswith('.pdf'):
-        context = extract_text_from_pdf(uploaded_file)
-    elif uploaded_file.name.endswith('.docx'):
-        context = extract_text_from_docx(uploaded_file)
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
-    texts = text_splitter.split_text(context)
-    vector_index = FAISS.from_texts(texts, embedding).as_retriever()
-    user_question = st.text_input("Ask Anything from the Document:", "")
-    if st.button("Get Answer"):
-        if user_question:
-            with st.spinner("Processing..."):
-                docs = vector_index.get_relevant_documents(user_question)
-                prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
-                chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
-                response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
-                st.subheader("Answer:")
-                st.write(response['output_text'])
-        else:
-            st.warning("Please enter a question.")

 import streamlit as st
 from langchain.prompts import PromptTemplate
 from langchain.chains.question_answering import load_qa_chain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores.faiss import FAISS
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
+from dotenv import load_dotenv
+import PyPDF2
+import os
+import io
+# Set page configuration
+st.set_page_config(layout="centered")
+st.markdown("<h1 style='font-size:24px;'>PDF ChatBot by Ali & Arooj</h1>", unsafe_allow_html=True)
+# Load environment variables from .env file
+load_dotenv()
+# Retrieve API key from environment variable
+google_api_key = os.getenv("GOOGLE_API_KEY")
+# Check if the API key is available
+if google_api_key is None:
+    st.warning("API key not found. Please set the google_api_key environment variable.")
+    st.stop()
+uploaded_file = st.file_uploader("Your PDF file here", type=["pdf", "docx"])
+# Prompt template
 prompt_template = """
 Answer the question as detailed as possible from the provided context,
 make sure to provide all the details, if the answer is not in
 Answer:
 """
+# Additional prompts
 prompt_template += """
 --------------------------------------------------
 Prompt Suggestions:
 1. Summarize the primary theme of the context.
 2. Elaborate on the crucial concepts highlighted in the context.
+...
 20. Cite case studies or examples that demonstrate the concepts discussed in the context.
 """
+# Function to process PDF and DOCX files
+def process_files(uploaded_file):
+    if uploaded_file is not None:
+        st.text("File Uploaded Successfully!")
+        # Check file type and process accordingly
+        if uploaded_file.type == "application/pdf":
+            # PDF Processing
+            pdf_data = uploaded_file.read()
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
+            pdf_pages = pdf_reader.pages
+            context = "\n\n".join(page.extract_text() for page in pdf_pages)
+            text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
+            texts = text_splitter.split_text(context)
+            embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+            vector_index = FAISS.from_texts(texts, embeddings).as_retriever()
+        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            # DOCX Processing (if needed)
+            pass
+        else:
+            st.warning("Unsupported file format. Please upload PDF or DOCX.")
+            st.stop()
+        user_question = st.text_input("Ask Anything from PDF:", "")
+        if st.button("Get Answer"):
+            if user_question:
+                with st.spinner("Processing..."):
+                    docs = vector_index.get_relevant_documents(user_question)
+                    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
+                    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key)
+                    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+                    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
+                    st.subheader("Answer:")
+                    st.write(response['output_text'])
+            else:
+                st.warning("Please Ask.")
+# Main function
+def main():
+    process_files(uploaded_file)
+if __name__ == "__main__":
+    main()