adilriaz264 commited on
Commit
2b3d474
·
verified ·
1 Parent(s): b0271e0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ from groq import Groq
9
+
10
+ GROQ_API_KEY = "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"
11
+ client = Groq(api_key=GROQ_API_KEY)
12
+
13
+ # Initialize embedding model
14
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Open-source model
15
+
16
+ # Initialize FAISS index
17
+ dimension = 384 # Embedding size of the model
18
+ index = faiss.IndexFlatL2(dimension)
19
+
20
+ # Helper Functions
21
+ def extract_text_from_pdfs(files):
22
+ """Extract text from multiple PDF files."""
23
+ all_text = ""
24
+ for file in files:
25
+ reader = PdfReader(file)
26
+ for page in reader.pages:
27
+ all_text += page.extract_text()
28
+ return all_text
29
+
30
+ def create_chunks(text, chunk_size=500):
31
+ """Split text into chunks of a specified size."""
32
+ words = text.split()
33
+ return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
34
+
35
+ def generate_embeddings(chunks):
36
+ """Generate embeddings for the given chunks."""
37
+ return embedding_model.encode(chunks, convert_to_numpy=True)
38
+
39
+ def query_groq(prompt):
40
+ """Query the Groq model for a response."""
41
+ response = client.chat.completions.create(
42
+ messages=[{"role": "user", "content": prompt}],
43
+ model="llama3-8b-8192"
44
+ )
45
+ return response.choices[0].message.content
46
+
47
+ def visualize_comparative_insights(methods, results):
48
+ """Generate a scatter plot of methods vs. results."""
49
+ plt.figure(figsize=(8, 6))
50
+ plt.scatter(methods, results, c='blue', alpha=0.7)
51
+ plt.title("Methods vs. Results")
52
+ plt.xlabel("Methods")
53
+ plt.ylabel("Results")
54
+ st.pyplot(plt)
55
+
56
+ # Streamlit App
57
+ st.title("Research Paper Analyzer")
58
+ st.write("Upload research papers, ask questions, and gain AI-driven insights!")
59
+
60
+ # Step 1: PDF Upload
61
+ uploaded_files = st.file_uploader("Upload your PDF files", type=["pdf"], accept_multiple_files=True)
62
+ if uploaded_files:
63
+ with st.spinner("Processing the PDFs..."):
64
+ # Extract text
65
+ pdf_text = extract_text_from_pdfs(uploaded_files)
66
+ st.success("PDFs processed successfully!")
67
+
68
+ # Step 2: Chunking
69
+ chunks = create_chunks(pdf_text)
70
+ st.info(f"Documents split into {len(chunks)} chunks.")
71
+
72
+ # Step 3: Embedding Creation
73
+ embeddings = generate_embeddings(chunks)
74
+ index.add(np.array(embeddings))
75
+ st.success("Embeddings stored in FAISS database.")
76
+
77
+ # Step 4: Query
78
+ user_query = st.text_input("Ask a question:")
79
+ if user_query:
80
+ with st.spinner("Searching and generating a response..."):
81
+ # Embed user query
82
+ query_embedding = embedding_model.encode([user_query], convert_to_numpy=True)
83
+
84
+ # Search in FAISS
85
+ distances, indices = index.search(np.array(query_embedding), k=5)
86
+ relevant_chunks = [chunks[i] for i in indices[0]]
87
+
88
+ # Combine retrieved chunks as context
89
+ context = " ".join(relevant_chunks)
90
+
91
+ # Query Groq model
92
+ prompt = f"Context: {context}\n\nQuestion: {user_query}\n\nAnswer:"
93
+ answer = query_groq(prompt)
94
+
95
+ # Display response
96
+ st.write("### Answer:")
97
+ st.write(answer)
98
+
99
+ # Step 5: Comparative Insights
100
+ if st.button("Generate Comparative Insights"):
101
+ # Example data for visualization
102
+ methods = [1, 2, 3, 4, 5] # Replace with actual methods data
103
+ results = [3.2, 4.1, 5.6, 4.8, 6.0] # Replace with actual results data
104
+ visualize_comparative_insights(methods, results)
105
+
106
+ # Step 6: Bibliography Suggestions
107
+ if st.button("Suggest Related Papers"):
108
+ related_papers = ["Paper A", "Paper B", "Paper C"] # Replace with actual suggestions
109
+ st.write("### Suggested Papers:")
110
+ for paper in related_papers:
111
+ st.write(f"- {paper}")