hamzaherry commited on
Commit
864d264
·
verified ·
1 Parent(s): 1580fc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -21
app.py CHANGED
@@ -6,8 +6,8 @@ import faiss
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
  from groq import Groq
 
9
 
10
- # Groq API Key
11
  GROQ_API_KEY = "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"
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
@@ -21,7 +21,16 @@ faiss_index = faiss.IndexFlatL2(embedding_dim)
21
  # Store Metadata
22
  metadata_store = []
23
 
24
- # Function Definitions
 
 
 
 
 
 
 
 
 
25
  def extract_text_from_pdf(pdf_file):
26
  pdf_reader = PdfReader(pdf_file)
27
  text = ""
@@ -29,17 +38,21 @@ def extract_text_from_pdf(pdf_file):
29
  text += page.extract_text()
30
  return text
31
 
 
32
  def chunk_text(text, chunk_size=500):
33
  words = text.split()
34
  return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
35
 
 
36
  def generate_embeddings(chunks):
37
  return embedding_model.encode(chunks)
38
 
 
39
  def store_embeddings(embeddings, metadata):
40
  faiss_index.add(np.array(embeddings))
41
  metadata_store.extend(metadata)
42
 
 
43
  def retrieve_relevant_chunks(query, k=5):
44
  query_embedding = embedding_model.encode([query])
45
  distances, indices = faiss_index.search(query_embedding, k)
@@ -48,6 +61,7 @@ def retrieve_relevant_chunks(query, k=5):
48
  ]
49
  return valid_results
50
 
 
51
  def ask_groq_api(question, context):
52
  chat_completion = client.chat.completions.create(
53
  messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
@@ -55,23 +69,7 @@ def ask_groq_api(question, context):
55
  )
56
  return chat_completion.choices[0].message.content
57
 
58
- def analyze_research_gaps(chunks):
59
- gaps = []
60
- for i, chunk_1 in enumerate(chunks):
61
- for j, chunk_2 in enumerate(chunks):
62
- if i != j:
63
- if chunk_1[:100] != chunk_2[:100]: # Example heuristic
64
- gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
65
- return "\n".join(gaps) if gaps else "No major inconsistencies found."
66
-
67
- def identify_research_gaps(chunks):
68
- unanswered_questions = []
69
- for chunk in chunks:
70
- if "future research" in chunk or "unanswered questions" in chunk:
71
- unanswered_questions.append(chunk)
72
- return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
73
-
74
- # Main Streamlit App Logic
75
  st.title("RAG-Based Research Paper Analyzer")
76
 
77
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
@@ -90,6 +88,58 @@ if uploaded_files:
90
 
91
  st.success("Files uploaded and processed successfully!")
92
 
93
- if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  research_gap_analysis = identify_research_gaps(all_chunks)
95
- st.write(f"**Research Gaps Identified:** {research_gap_analysis}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
  from groq import Groq
9
+ import faiss
10
 
 
11
  GROQ_API_KEY = "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"
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
 
21
  # Store Metadata
22
  metadata_store = []
23
 
24
+ # Function to identify unanswered questions based on comparative analysis of multiple papers
25
+ def identify_research_gaps(chunks):
26
+ unanswered_questions = []
27
+ # Simulate a simple search for keywords related to unanswered questions
28
+ for chunk in chunks:
29
+ if "future research" in chunk or "unanswered questions" in chunk:
30
+ unanswered_questions.append(chunk)
31
+ return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
32
+
33
+ # Function to extract text from PDFs
34
  def extract_text_from_pdf(pdf_file):
35
  pdf_reader = PdfReader(pdf_file)
36
  text = ""
 
38
  text += page.extract_text()
39
  return text
40
 
41
+ # Function to chunk text
42
  def chunk_text(text, chunk_size=500):
43
  words = text.split()
44
  return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
45
 
46
+ # Function to generate embeddings
47
  def generate_embeddings(chunks):
48
  return embedding_model.encode(chunks)
49
 
50
+ # Store embeddings in FAISS index
51
  def store_embeddings(embeddings, metadata):
52
  faiss_index.add(np.array(embeddings))
53
  metadata_store.extend(metadata)
54
 
55
+ # Retrieve relevant chunks based on query
56
  def retrieve_relevant_chunks(query, k=5):
57
  query_embedding = embedding_model.encode([query])
58
  distances, indices = faiss_index.search(query_embedding, k)
 
61
  ]
62
  return valid_results
63
 
64
+ # Call Groq API to get answers and research gap analysis
65
  def ask_groq_api(question, context):
66
  chat_completion = client.chat.completions.create(
67
  messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
 
69
  )
70
  return chat_completion.choices[0].message.content
71
 
72
+ # Streamlit UI setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  st.title("RAG-Based Research Paper Analyzer")
74
 
75
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
 
88
 
89
  st.success("Files uploaded and processed successfully!")
90
 
91
+ # Button to view topic summaries with an emoji
92
+ if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="📚"):
93
+ for chunk in all_chunks[:3]:
94
+ st.write(chunk)
95
+
96
+ # User input for query without the icon
97
+ user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details")
98
+
99
+ if user_question:
100
+ relevant_chunks = retrieve_relevant_chunks(user_question)
101
+ if relevant_chunks:
102
+ context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
103
+ answer = ask_groq_api(user_question, context)
104
+ st.write("**Answer:**", answer)
105
+
106
+ # Implement Research Gap Identification based on inconsistencies between papers
107
+ st.subheader("Research Gap Analysis:", icon="⚠️")
108
+ # We will analyze the chunks and context to identify research gaps
109
+ research_gap = analyze_research_gaps(all_chunks)
110
+ st.write(f"**Research Gaps Identified:** {research_gap}")
111
+ else:
112
+ st.write("No relevant sections found for your question.")
113
+
114
+ # Adding an emoji for research gap feature
115
+ if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="⚠️"):
116
+ st.write("**Research Gap Analysis:**")
117
+ # Implementing research gap analysis based on comparing papers
118
  research_gap_analysis = identify_research_gaps(all_chunks)
119
+ st.write(research_gap_analysis)
120
+
121
+ # Button to generate scatter plot with a chart emoji
122
+ if st.button("Generate Scatter Plot", icon="📊"):
123
+ st.write("Generating scatter plot for methods vs. results...")
124
+ # Example scatter plot (replace with real data)
125
+ x = np.random.rand(10)
126
+ y = np.random.rand(10)
127
+ plt.scatter(x, y)
128
+ plt.xlabel("Methods")
129
+ plt.ylabel("Results")
130
+ st.pyplot(plt)
131
+
132
+ # Text area for annotations without the icon
133
+ st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here")
134
+
135
+ # Function to analyze and identify research gaps by comparing chunks from different papers
136
+ def analyze_research_gaps(chunks):
137
+ # Here we would compare text from different papers to identify discrepancies
138
+ gaps = []
139
+ for i, chunk_1 in enumerate(chunks):
140
+ for j, chunk_2 in enumerate(chunks):
141
+ if i != j:
142
+ # Simple heuristic to compare chunks for inconsistencies or gaps
143
+ if chunk_1[:100] != chunk_2[:100]: # Checking first 100 characters for difference
144
+ gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
145
+ return "\n".join(gaps) if gaps else "No major inconsistencies found."