Spaces:

fhmsf
/

AI-Powered-Personalized-Research-Assistant

Sleeping

App Files Files Community

fhmsf commited on Jan 4

Commit

df2b51a

verified ·

1 Parent(s): a2161e1

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -103

app.py CHANGED Viewed

@@ -1,19 +1,16 @@
 import os
 import faiss
 import numpy as np
 import requests
-import streamlit as st
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
-###############################################################################
 # 1. PDF Parsing and Chunking
-###############################################################################
 def extract_pdf_text(pdf_file) -> str:
-    """
-    Read and extract text from each page of an uploaded PDF file.
-    """
     reader = PdfReader(pdf_file)
     all_text = []
     for page in reader.pages:
@@ -22,10 +19,6 @@ def extract_pdf_text(pdf_file) -> str:
     return "\n".join(all_text)
 def chunk_text(text, chunk_size=300, overlap=50):
-    """
-    Splits text into overlapping chunks, each approx. 'chunk_size' tokens.
-    'overlap' is how many tokens from the previous chunk to include again.
-    """
     words = text.split()
     chunks = []
     start = 0
@@ -36,48 +29,36 @@ def chunk_text(text, chunk_size=300, overlap=50):
         start += (chunk_size - overlap)
     return chunks
-###############################################################################
 # 2. Embedding Model
-###############################################################################
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-###############################################################################
 # 3. Build FAISS Index
-###############################################################################
 def build_faiss_index(chunks):
-    """
-    Creates a FAISS index from embedded chunks.
-    Returns (index, chunk_embeddings).
-    """
     chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False)
     chunk_embeddings = np.array(chunk_embeddings, dtype='float32')
     dimension = chunk_embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(chunk_embeddings)
     return index, chunk_embeddings
-###############################################################################
 # 4. Retrieval Function
-###############################################################################
 def retrieve_chunks(query, index, chunks, top_k=3):
-    """
-    Embeds 'query' and retrieves the top_k most relevant chunks from 'index'.
-    """
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     query_embedding = np.array(query_embedding, dtype='float32')
     distances, indices = index.search(query_embedding, top_k)
     return [chunks[i] for i in indices[0]]
-###############################################################################
 # 5. Gemini LLM Integration
-###############################################################################
 def gemini_generate(prompt):
-    """
-    Calls Google's Gemini API with the environment variable GEMINI_API_KEY.
-    """
     gemini_api_key = os.environ.get("GEMINI_API_KEY", "")
     if not gemini_api_key:
         return "Error: No GEMINI_API_KEY found in environment variables."
@@ -87,7 +68,8 @@ def gemini_generate(prompt):
         "v1beta/models/gemini-1.5-flash:generateContent"
         f"?key={gemini_api_key}"
     )
-    payload = {
         "contents": [
             {
                 "parts": [
@@ -97,32 +79,27 @@ def gemini_generate(prompt):
         ]
     }
     headers = {"Content-Type": "application/json"}
     try:
-        response = requests.post(url, headers=headers, json=payload)
-        response.raise_for_status()
-        r_data = response.json()
-        # Extract the text from the 'candidates' structure:
         return r_data["candidates"][0]["content"]["parts"][0]["text"]
-    except requests.exceptions.RequestException as e:
-        return f"Error calling Gemini API: {e}"
-    except KeyError:
-        return f"Parsing error or unexpected response format: {response.text}"
-###############################################################################
 # 6. RAG QA Function
-###############################################################################
 def answer_question_with_RAG(user_question, index, chunks):
-    """
-    Retrieves relevant chunks, builds an augmented prompt, and calls gemini_generate().
-    """
     relevant_chunks = retrieve_chunks(user_question, index, chunks, top_k=3)
     context = "\n\n".join(relevant_chunks)
     prompt = f"""
     You are an AI assistant that knows the details from the uploaded research paper.
     Answer the user's question accurately using the context below.
-    If something is not in the context, say 'I don't know'.
     Context:
     {context}
@@ -133,62 +110,122 @@ def answer_question_with_RAG(user_question, index, chunks):
     """
     return gemini_generate(prompt)
-###############################################################################
-# Streamlit Application
-###############################################################################
-def main():
-    # Basic page config (optional):
-    st.set_page_config(
-        page_title="AI-Powered Personal Research Assistant",
-        layout="centered"
     )
-    # Title and Subheader
-    st.title("AI-Powered Personal Research Assistant")
-    st.write("Welcome! How may I help you?")
-    # Store the FAISS index + chunks in session_state to persist across reruns
-    if "faiss_index" not in st.session_state:
-        st.session_state.faiss_index = None
-    if "chunks" not in st.session_state:
-        st.session_state.chunks = None
-    # Step 1: Upload and Process PDF
-    uploaded_pdf = st.file_uploader("Upload your research paper (PDF)", type=["pdf"])
-    if st.button("Process PDF"):
-        if uploaded_pdf is None:
-            st.warning("Please upload a PDF file first.")
-        else:
-            # Read and chunk
-            raw_text = extract_pdf_text(uploaded_pdf)
-            if not raw_text.strip():
-                st.error("No text found in PDF.")
-                return
-            chunks = chunk_text(raw_text, chunk_size=300, overlap=50)
-            if not chunks:
-                st.error("No valid text to chunk.")
-                return
-            # Build index
-            faiss_index, _ = build_faiss_index(chunks)
-            st.session_state.faiss_index = faiss_index
-            st.session_state.chunks = chunks
-            st.success("PDF processed successfully!")
-    # Step 2: Ask a Question
-    user_question = st.text_input("Ask a question about your research paper:")
-    if st.button("Get Answer"):
-        if not st.session_state.faiss_index or not st.session_state.chunks:
-            st.warning("Please upload and process a PDF first.")
-        elif not user_question.strip():
-            st.warning("Please enter a valid question.")
-        else:
-            answer = answer_question_with_RAG(
-                user_question,
-                st.session_state.faiss_index,
-                st.session_state.chunks
-            )
-            st.write("### Answer:")
-            st.write(answer)
-if __name__ == "__main__":
-    main()

 import os
 import faiss
+import gradio as gr
 import numpy as np
 import requests
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
+################################################################################
 # 1. PDF Parsing and Chunking
+################################################################################
 def extract_pdf_text(pdf_file) -> str:
     reader = PdfReader(pdf_file)
     all_text = []
     for page in reader.pages:
     return "\n".join(all_text)
 def chunk_text(text, chunk_size=300, overlap=50):
     words = text.split()
     chunks = []
     start = 0
         start += (chunk_size - overlap)
     return chunks
+################################################################################
 # 2. Embedding Model
+################################################################################
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+################################################################################
 # 3. Build FAISS Index
+################################################################################
 def build_faiss_index(chunks):
     chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False)
     chunk_embeddings = np.array(chunk_embeddings, dtype='float32')
     dimension = chunk_embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(chunk_embeddings)
     return index, chunk_embeddings
+################################################################################
 # 4. Retrieval Function
+################################################################################
 def retrieve_chunks(query, index, chunks, top_k=3):
     query_embedding = embedding_model.encode([query], show_progress_bar=False)
     query_embedding = np.array(query_embedding, dtype='float32')
     distances, indices = index.search(query_embedding, top_k)
     return [chunks[i] for i in indices[0]]
+################################################################################
 # 5. Gemini LLM Integration
+################################################################################
 def gemini_generate(prompt):
     gemini_api_key = os.environ.get("GEMINI_API_KEY", "")
     if not gemini_api_key:
         return "Error: No GEMINI_API_KEY found in environment variables."
         "v1beta/models/gemini-1.5-flash:generateContent"
         f"?key={gemini_api_key}"
     )
+    data = {
         "contents": [
             {
                 "parts": [
         ]
     }
     headers = {"Content-Type": "application/json"}
+    response = requests.post(url, headers=headers, json=data)
+    if response.status_code != 200:
+        return f"Error {response.status_code}: {response.text}"
+    r_data = response.json()
     try:
         return r_data["candidates"][0]["content"]["parts"][0]["text"]
+    except Exception:
+        return f"Parsing error or unexpected response structure: {r_data}"
+################################################################################
 # 6. RAG QA Function
+################################################################################
 def answer_question_with_RAG(user_question, index, chunks):
     relevant_chunks = retrieve_chunks(user_question, index, chunks, top_k=3)
     context = "\n\n".join(relevant_chunks)
     prompt = f"""
     You are an AI assistant that knows the details from the uploaded research paper.
     Answer the user's question accurately using the context below.
+    If something is not in the context, say you don't know.
     Context:
     {context}
     """
     return gemini_generate(prompt)
+################################################################################
+# 7. Gradio Interface
+################################################################################
+def process_pdf(pdf_file):
+    if pdf_file is None:
+        return None, "Please upload a PDF file."
+    text = extract_pdf_text(pdf_file.name)
+    if not text:
+        return None, "No text found in PDF."
+    chunks = chunk_text(text, chunk_size=300, overlap=50)
+    if not chunks:
+        return None, "No valid text to chunk."
+    faiss_index, _ = build_faiss_index(chunks)
+    return (faiss_index, chunks), "PDF processed successfully!"
+def chat_with_paper(query, state):
+    if not state:
+        return "Please upload and process a PDF first."
+    faiss_index, doc_chunks = state
+    if not query or not query.strip():
+        return "Please enter a valid question."
+    return answer_question_with_RAG(query, faiss_index, doc_chunks)
+demo_theme = gr.themes.Soft(primary_hue="slate")
+css_code = """
+body {
+    background-color: #E6F7FF !important; /* Lightest blue */
+    margin: 0;
+    padding: 0;
+}
+.block > .inside {
+    margin: auto !important;
+    max-width: 900px !important;
+    border: 4px solid black !important;
+    border-radius: 10px !important;
+    background-color: #FFFFFF !important;
+    padding: 20px !important;
+}
+#icon-container {
+    text-align: center !important;
+    margin-top: 1rem !important;
+    margin-bottom: 1rem !important;
+}
+#app-title {
+    text-align: center !important;
+    font-size: 3rem !important;
+    font-weight: 900 !important;
+    margin-bottom: 0.5rem !important;
+    margin-top: 0.5rem !important;
+}
+#app-welcome {
+    text-align: center !important;
+    font-size: 1.5rem !important;
+    color: #444 !important;
+    margin-bottom: 25px !important;
+    font-weight: 700 !important;
+}
+button {
+    background-color: #3CB371 !important;
+    color: #ffffff !important;
+    border: none !important;
+    font-weight: 600 !important;
+    cursor: pointer;
+}
+button:hover {
+    background-color: #2E8B57 !important;
+}
+textarea, input[type="text"] {
+    text-align: center !important;
+}
+"""
+with gr.Blocks(theme=demo_theme, css=css_code) as demo:
+    gr.Markdown("""
+    <div id="icon-container">
+        <img src="https://i.ibb.co/3Wp3yBZ/ai-icon.png" alt="AI icon" style="width:100px;">
+    </div>
+    """)
+    gr.Markdown("<div id='app-title'>AI-Powered Personal Research Assistant</div>")
+    gr.Markdown("<div id='app-welcome'>Welcome! How may I help you?</div>")
+    state = gr.State()
+    with gr.Row():
+        pdf_input = gr.File(label="Upload your research paper (PDF)", file_types=[".pdf"])
+        process_button = gr.Button("Process PDF")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    process_button.click(
+        fn=process_pdf,
+        inputs=pdf_input,
+        outputs=[state, status_output]
+    )
+    with gr.Row():
+        user_query = gr.Textbox(label="Ask a question about your research paper:")
+        ask_button = gr.Button("Get Answer")
+    answer_output = gr.Textbox(label="Answer")
+    ask_button.click(
+        fn=chat_with_paper,
+        inputs=[user_query, state],
+        outputs=answer_output
     )
+demo.launch()