Spaces:

fhmsf
/

AI-Powered-Personalized-Research-Assistant

Sleeping

App Files Files Community

fhmsf commited on Jan 4

Commit

921780e

verified ·

1 Parent(s): 3d0f58b

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -168

app.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import os
 import faiss
-import gradio as gr
 import numpy as np
 import requests
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
-################################################################################
 # 1. PDF Parsing and Chunking
-################################################################################
 def extract_pdf_text(pdf_file) -> str:
     reader = PdfReader(pdf_file)
     all_text = []
     for page in reader.pages:
@@ -19,6 +22,10 @@ def extract_pdf_text(pdf_file) -> str:
     return "\n".join(all_text)
 def chunk_text(text, chunk_size=300, overlap=50):
     words = text.split()
     chunks = []
     start = 0
@@ -29,44 +36,58 @@ def chunk_text(text, chunk_size=300, overlap=50):
         start += (chunk_size - overlap)
     return chunks
-################################################################################
 # 2. Embedding Model
-################################################################################
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-################################################################################
 # 3. Build FAISS Index
-################################################################################
 def build_faiss_index(chunks):
     chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False)
     chunk_embeddings = np.array(chunk_embeddings, dtype='float32')
     dimension = chunk_embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(chunk_embeddings)
     return index, chunk_embeddings
-################################################################################
 # 4. Retrieval Function
-################################################################################
 def retrieve_chunks(query, index, chunks, top_k=3):
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     query_embedding = np.array(query_embedding, dtype='float32')
     distances, indices = index.search(query_embedding, top_k)
-    relevant_chunks = [chunks[i] for i in indices[0]]
-    return relevant_chunks
-################################################################################
-# 5. Gemini LLM Integration (Updated for "candidates" response)
-################################################################################
 def gemini_generate(prompt):
     gemini_api_key = os.environ.get("GEMINI_API_KEY", "")
     if not gemini_api_key:
         return "Error: No GEMINI_API_KEY found in environment variables."
-    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={gemini_api_key}"
-    data = {
         "contents": [
             {
                 "parts": [
@@ -77,170 +98,97 @@ def gemini_generate(prompt):
     }
     headers = {"Content-Type": "application/json"}
-    response = requests.post(url, headers=headers, json=data)
-    if response.status_code != 200:
-        return f"Error {response.status_code}: {response.text}"
-    r_data = response.json()
     try:
-        generated_text = r_data["candidates"][0]["content"]["parts"][0]["text"]
-        return generated_text
-    except Exception:
-        return f"Parsing error or unexpected response structure: {r_data}"
-################################################################################
 # 6. RAG QA Function
-################################################################################
 def answer_question_with_RAG(user_question, index, chunks):
     relevant_chunks = retrieve_chunks(user_question, index, chunks, top_k=3)
     context = "\n\n".join(relevant_chunks)
     prompt = f"""
     You are an AI assistant that knows the details from the uploaded research paper.
     Answer the user's question accurately using the context below.
-    If something is not in the context, say you don't know.
     Context:
     {context}
     User's question: {user_question}
     Answer:
     """
     return gemini_generate(prompt)
-################################################################################
-# 7. Gradio Interface (Enhanced Styling)
-################################################################################
-def process_pdf(pdf_file):
-    if pdf_file is None:
-        return None, "Please upload a PDF file."
-    text = extract_pdf_text(pdf_file.name)
-    if not text:
-        return None, "No text found in PDF."
-    chunks = chunk_text(text, chunk_size=300, overlap=50)
-    if not chunks:
-        return None, "No valid text to chunk."
-    faiss_index, _ = build_faiss_index(chunks)
-    return (faiss_index, chunks), "PDF processed successfully!"
-def chat_with_paper(query, state):
-    if not state:
-        return "Please upload and process a PDF first."
-    faiss_index, doc_chunks = state
-    if not query or not query.strip():
-        return "Please enter a valid question."
-    answer = answer_question_with_RAG(query, faiss_index, doc_chunks)
-    return answer
-demo_theme = gr.themes.Soft(primary_hue="slate")
-# Custom CSS:
-# 1. Lightest blue background
-# 2. Green buttons
-# 3. Thick black border, centered content
-# 4. Large, bold, center-aligned title
-# 5. Representative icon at top, bigger font for welcome text
-css_code = """
-body {
-    background-color: #E6F7FF !important; /* Lightest blue */
-    margin: 0;
-    padding: 0;
-}
-/* Center the entire Gradio container and give a thick black border */
-.block > .inside {
-    margin: auto !important;
-    max-width: 900px !important; /* You can increase/decrease the max-width for your preference */
-    border: 4px solid black !important; /* Thick black border */
-    border-radius: 10px !important;
-    background-color: #FFFFFF !important; /* White container for clarity */
-    padding: 20px !important;
-}
-/* Title heading: bigger, bolder, centered */
-#app-title {
-    text-align: center !important;
-    font-size: 3rem !important;
-    font-weight: 900 !important;
-    margin-bottom: 0.5rem !important;
-    margin-top: 0.5rem !important;
-}
-/* Welcome text: slightly smaller, but still bold, centered */
-#app-welcome {
-    text-align: center !important;
-    font-size: 1.5rem !important;
-    color: #444 !important;
-    margin-bottom: 25px !important;
-    font-weight: 700 !important;
-}
-/* Buttons: green background, white text */
-button {
-    background-color: #3CB371 !important; /* Medium sea green */
-    color: #ffffff !important;
-    border: none !important;
-    font-weight: 600 !important;
-    cursor: pointer;
-}
-/* Button hover effect: darker green */
-button:hover {
-    background-color: #2E8B57 !important;
-}
-/* Optional: center the text in textboxes, if you like */
-textarea, input[type="text"] {
-    text-align: center !important;
-}
-/* Icon container styling */
-#icon-container {
-    text-align: center !important;
-    margin-top: 1rem !important;
-    margin-bottom: 1rem !important;
-}
-"""
-with gr.Blocks(theme=demo_theme, css=css_code) as demo:
-    # Representative icon/image at the top
-    # Replace the 'src' with any other icon URL you prefer
-    gr.Markdown("""
-    <div id="icon-container">
-        <img src="https://i.ibb.co/3Wp3yBZ/ai-icon.png" alt="AI icon" style="width:100px;">
-    </div>
-    """)
-    # App title (large, bold, centered)
-    gr.Markdown("<div id='app-title'>AI-Powered Personal Research Assistant</div>")
-    # Welcome text right under the title
-    gr.Markdown("<div id='app-welcome'>Welcome! How may I help you?</div>")
-    state = gr.State()
-    with gr.Row():
-        pdf_input = gr.File(label="Upload your research paper (PDF)", file_types=[".pdf"])
-        process_button = gr.Button("Process PDF")
-    status_output = gr.Textbox(label="Status", interactive=False)
-    process_button.click(
-        fn=process_pdf,
-        inputs=pdf_input,
-        outputs=[state, status_output]
-    )
-    with gr.Row():
-        user_query = gr.Textbox(label="Ask a question about your research paper:")
-        ask_button = gr.Button("Get Answer")
-    answer_output = gr.Textbox(label="Answer")
-    ask_button.click(
-        fn=chat_with_paper,
-        inputs=[user_query, state],
-        outputs=answer_output
     )
-demo.launch()

 import os
 import faiss
 import numpy as np
 import requests
+import streamlit as st
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
+###############################################################################
 # 1. PDF Parsing and Chunking
+###############################################################################
 def extract_pdf_text(pdf_file) -> str:
+    """
+    Read and extract text from each page of an uploaded PDF file.
+    """
     reader = PdfReader(pdf_file)
     all_text = []
     for page in reader.pages:
     return "\n".join(all_text)
 def chunk_text(text, chunk_size=300, overlap=50):
+    """
+    Splits text into overlapping chunks, each approx. 'chunk_size' tokens.
+    'overlap' is how many tokens from the previous chunk to include again.
+    """
     words = text.split()
     chunks = []
     start = 0
         start += (chunk_size - overlap)
     return chunks
+###############################################################################
 # 2. Embedding Model
+###############################################################################
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+###############################################################################
 # 3. Build FAISS Index
+###############################################################################
 def build_faiss_index(chunks):
+    """
+    Creates a FAISS index from embedded chunks.
+    Returns (index, chunk_embeddings).
+    """
     chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False)
     chunk_embeddings = np.array(chunk_embeddings, dtype='float32')
     dimension = chunk_embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(chunk_embeddings)
     return index, chunk_embeddings
+###############################################################################
 # 4. Retrieval Function
+###############################################################################
 def retrieve_chunks(query, index, chunks, top_k=3):
+    """
+    Embeds 'query' and retrieves the top_k most relevant chunks from 'index'.
+    """
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     query_embedding = np.array(query_embedding, dtype='float32')
     distances, indices = index.search(query_embedding, top_k)
+    return [chunks[i] for i in indices[0]]
+###############################################################################
+# 5. Gemini LLM Integration
+###############################################################################
 def gemini_generate(prompt):
+    """
+    Calls Google's Gemini API with the environment variable GEMINI_API_KEY.
+    """
     gemini_api_key = os.environ.get("GEMINI_API_KEY", "")
     if not gemini_api_key:
         return "Error: No GEMINI_API_KEY found in environment variables."
+    url = (
+        "https://generativelanguage.googleapis.com/"
+        "v1beta/models/gemini-1.5-flash:generateContent"
+        f"?key={gemini_api_key}"
+    )
+    payload = {
         "contents": [
             {
                 "parts": [
     }
     headers = {"Content-Type": "application/json"}
     try:
+        response = requests.post(url, headers=headers, json=payload)
+        response.raise_for_status()
+        r_data = response.json()
+        # Extract the text from the 'candidates' structure:
+        return r_data["candidates"][0]["content"]["parts"][0]["text"]
+    except requests.exceptions.RequestException as e:
+        return f"Error calling Gemini API: {e}"
+    except KeyError:
+        return f"Parsing error or unexpected response format: {response.text}"
+###############################################################################
 # 6. RAG QA Function
+###############################################################################
 def answer_question_with_RAG(user_question, index, chunks):
+    """
+    Retrieves relevant chunks, builds an augmented prompt, and calls gemini_generate().
+    """
     relevant_chunks = retrieve_chunks(user_question, index, chunks, top_k=3)
     context = "\n\n".join(relevant_chunks)
     prompt = f"""
     You are an AI assistant that knows the details from the uploaded research paper.
     Answer the user's question accurately using the context below.
+    If something is not in the context, say 'I don't know'.
     Context:
     {context}
     User's question: {user_question}
     Answer:
     """
     return gemini_generate(prompt)
+###############################################################################
+# Streamlit Application
+###############################################################################
+def main():
+    # Basic page config (optional):
+    st.set_page_config(
+        page_title="AI-Powered Personal Research Assistant",
+        layout="centered"
     )
+    # Title and Subheader
+    st.title("AI-Powered Personal Research Assistant")
+    st.write("Welcome! How may I help you?")
+    # Store the FAISS index + chunks in session_state to persist across reruns
+    if "faiss_index" not in st.session_state:
+        st.session_state.faiss_index = None
+    if "chunks" not in st.session_state:
+        st.session_state.chunks = None
+    # Step 1: Upload and Process PDF
+    uploaded_pdf = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
+    if st.button("Process PDF"):
+        if uploaded_pdf is None:
+            st.warning("Please upload a PDF file first.")
+        else:
+            # Read and chunk
+            raw_text = extract_pdf_text(uploaded_pdf)
+            if not raw_text.strip():
+                st.error("No text found in PDF.")
+                return
+            chunks = chunk_text(raw_text, chunk_size=300, overlap=50)
+            if not chunks:
+                st.error("No valid text to chunk.")
+                return
+            # Build index
+            faiss_index, _ = build_faiss_index(chunks)
+            st.session_state.faiss_index = faiss_index
+            st.session_state.chunks = chunks
+            st.success("PDF processed successfully!")
+    # Step 2: Ask a Question
+    user_question = st.text_input("Ask a question about your research paper:")
+    if st.button("Get Answer"):
+        if not st.session_state.faiss_index or not st.session_state.chunks:
+            st.warning("Please upload and process a PDF first.")
+        elif not user_question.strip():
+            st.warning("Please enter a valid question.")
+        else:
+            answer = answer_question_with_RAG(
+                user_question,
+                st.session_state.faiss_index,
+                st.session_state.chunks
+            )
+            st.write("### Answer:")
+            st.write(answer)
+if __name__ == "__main__":
+    main()