import os import faiss import gradio as gr import numpy as np import requests from pypdf import PdfReader from sentence_transformers import SentenceTransformer ################################################################################ # 1. PDF Parsing and Chunking ################################################################################ def extract_pdf_text(pdf_file) -> str: reader = PdfReader(pdf_file) all_text = [] for page in reader.pages: text = page.extract_text() or "" all_text.append(text.strip()) return "\n".join(all_text) def chunk_text(text, chunk_size=300, overlap=50): words = text.split() chunks = [] start = 0 while start < len(words): end = start + chunk_size chunk = words[start:end] chunks.append(" ".join(chunk)) start += (chunk_size - overlap) return chunks ################################################################################ # 2. Embedding Model ################################################################################ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") ################################################################################ # 3. Build FAISS Index ################################################################################ def build_faiss_index(chunks): chunk_embeddings = embedding_model.encode(chunks, show_progress_bar=False) chunk_embeddings = np.array(chunk_embeddings, dtype='float32') dimension = chunk_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(chunk_embeddings) return index, chunk_embeddings ################################################################################ # 4. Retrieval Function ################################################################################ def retrieve_chunks(query, index, chunks, top_k=3): query_embedding = embedding_model.encode([query], show_progress_bar=False) query_embedding = np.array(query_embedding, dtype='float32') distances, indices = index.search(query_embedding, top_k) relevant_chunks = [chunks[i] for i in indices[0]] return relevant_chunks ################################################################################ # 5. Gemini LLM Integration (Updated for "candidates" response) ################################################################################ def gemini_generate(prompt): gemini_api_key = os.environ.get("GEMINI_API_KEY", "") if not gemini_api_key: return "Error: No GEMINI_API_KEY found in environment variables." url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={gemini_api_key}" data = { "contents": [ { "parts": [ {"text": prompt} ] } ] } headers = {"Content-Type": "application/json"} response = requests.post(url, headers=headers, json=data) if response.status_code != 200: return f"Error {response.status_code}: {response.text}" r_data = response.json() try: generated_text = r_data["candidates"][0]["content"]["parts"][0]["text"] return generated_text except Exception: return f"Parsing error or unexpected response structure: {r_data}" ################################################################################ # 6. RAG QA Function ################################################################################ def answer_question_with_RAG(user_question, index, chunks): relevant_chunks = retrieve_chunks(user_question, index, chunks, top_k=3) context = "\n\n".join(relevant_chunks) prompt = f""" You are an AI assistant that knows the details from the uploaded research paper. Answer the user's question accurately using the context below. If something is not in the context, say you don't know. Context: {context} User's question: {user_question} Answer: """ return gemini_generate(prompt) ################################################################################ # 7. Gradio Interface (Enhanced Styling) ################################################################################ def process_pdf(pdf_file): if pdf_file is None: return None, "Please upload a PDF file." text = extract_pdf_text(pdf_file.name) if not text: return None, "No text found in PDF." chunks = chunk_text(text, chunk_size=300, overlap=50) if not chunks: return None, "No valid text to chunk." faiss_index, _ = build_faiss_index(chunks) return (faiss_index, chunks), "PDF processed successfully!" def chat_with_paper(query, state): if not state: return "Please upload and process a PDF first." faiss_index, doc_chunks = state if not query or not query.strip(): return "Please enter a valid question." answer = answer_question_with_RAG(query, faiss_index, doc_chunks) return answer demo_theme = gr.themes.Soft(primary_hue="slate") # Custom CSS: # 1. Lightest blue background # 2. Green buttons # 3. Thick black border, centered content # 4. Large, bold, center-aligned title # 5. Representative icon at top, bigger font for welcome text css_code = """ body { background-color: #E6F7FF !important; /* Lightest blue */ margin: 0; padding: 0; } /* Center the entire Gradio container and give a thick black border */ .block > .inside { margin: auto !important; max-width: 900px !important; /* You can increase/decrease the max-width for your preference */ border: 4px solid black !important; /* Thick black border */ border-radius: 10px !important; background-color: #FFFFFF !important; /* White container for clarity */ padding: 20px !important; } /* Title heading: bigger, bolder, centered */ #app-title { text-align: center !important; font-size: 3rem !important; font-weight: 900 !important; margin-bottom: 0.5rem !important; margin-top: 0.5rem !important; } /* Welcome text: slightly smaller, but still bold, centered */ #app-welcome { text-align: center !important; font-size: 1.5rem !important; color: #444 !important; margin-bottom: 25px !important; font-weight: 700 !important; } /* Buttons: green background, white text */ button { background-color: #3CB371 !important; /* Medium sea green */ color: #ffffff !important; border: none !important; font-weight: 600 !important; cursor: pointer; } /* Button hover effect: darker green */ button:hover { background-color: #2E8B57 !important; } /* Optional: center the text in textboxes, if you like */ textarea, input[type="text"] { text-align: center !important; } /* Icon container styling */ #icon-container { text-align: center !important; margin-top: 1rem !important; margin-bottom: 1rem !important; } """ with gr.Blocks(theme=demo_theme, css=css_code) as demo: # Representative icon/image at the top # Replace the 'src' with any other icon URL you prefer gr.Markdown("""