hamzaherry commited on
Commit
1580fc0
·
verified ·
1 Parent(s): ee979d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -71
app.py CHANGED
@@ -6,8 +6,8 @@ import faiss
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
  from groq import Groq
9
- import faiss
10
 
 
11
  GROQ_API_KEY = "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"
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
@@ -21,7 +21,7 @@ faiss_index = faiss.IndexFlatL2(embedding_dim)
21
  # Store Metadata
22
  metadata_store = []
23
 
24
- # Function to extract text from PDFs
25
  def extract_text_from_pdf(pdf_file):
26
  pdf_reader = PdfReader(pdf_file)
27
  text = ""
@@ -29,21 +29,17 @@ def extract_text_from_pdf(pdf_file):
29
  text += page.extract_text()
30
  return text
31
 
32
- # Function to chunk text
33
  def chunk_text(text, chunk_size=500):
34
  words = text.split()
35
  return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
36
 
37
- # Function to generate embeddings
38
  def generate_embeddings(chunks):
39
  return embedding_model.encode(chunks)
40
 
41
- # Store embeddings in FAISS index
42
  def store_embeddings(embeddings, metadata):
43
  faiss_index.add(np.array(embeddings))
44
  metadata_store.extend(metadata)
45
 
46
- # Retrieve relevant chunks based on query
47
  def retrieve_relevant_chunks(query, k=5):
48
  query_embedding = embedding_model.encode([query])
49
  distances, indices = faiss_index.search(query_embedding, k)
@@ -52,7 +48,6 @@ def retrieve_relevant_chunks(query, k=5):
52
  ]
53
  return valid_results
54
 
55
- # Call Groq API to get answers and research gap analysis
56
  def ask_groq_api(question, context):
57
  chat_completion = client.chat.completions.create(
58
  messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
@@ -60,7 +55,23 @@ def ask_groq_api(question, context):
60
  )
61
  return chat_completion.choices[0].message.content
62
 
63
- # Streamlit UI setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  st.title("RAG-Based Research Paper Analyzer")
65
 
66
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
@@ -79,67 +90,6 @@ if uploaded_files:
79
 
80
  st.success("Files uploaded and processed successfully!")
81
 
82
- # Button to view topic summaries with an emoji
83
- if st.button("View Topic Summaries", help="Click to view a brief summary of the uploaded papers", icon="📚"):
84
- for chunk in all_chunks[:3]:
85
- st.write(chunk)
86
-
87
- # User input for query without the icon
88
- user_question = st.text_input("Ask a question about the uploaded papers:", help="Ask about specific research details")
89
-
90
- if user_question:
91
- relevant_chunks = retrieve_relevant_chunks(user_question)
92
- if relevant_chunks:
93
- context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
94
- answer = ask_groq_api(user_question, context)
95
- st.write("**Answer:**", answer)
96
-
97
- # Implement Research Gap Identification based on inconsistencies between papers
98
- st.subheader("Research Gap Analysis:", icon="⚠️")
99
- # We will analyze the chunks and context to identify research gaps
100
- research_gap = analyze_research_gaps(all_chunks)
101
- st.write(f"**Research Gaps Identified:** {research_gap}")
102
- else:
103
- st.write("No relevant sections found for your question.")
104
-
105
- # Adding an emoji for research gap feature
106
- if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking", icon="⚠️"):
107
- st.write("**Research Gap Analysis:**")
108
- # Implementing research gap analysis based on comparing papers
109
  research_gap_analysis = identify_research_gaps(all_chunks)
110
- st.write(research_gap_analysis)
111
-
112
- # Button to generate scatter plot with a chart emoji
113
- if st.button("Generate Scatter Plot", icon="📊"):
114
- st.write("Generating scatter plot for methods vs. results...")
115
- # Example scatter plot (replace with real data)
116
- x = np.random.rand(10)
117
- y = np.random.rand(10)
118
- plt.scatter(x, y)
119
- plt.xlabel("Methods")
120
- plt.ylabel("Results")
121
- st.pyplot(plt)
122
-
123
- # Text area for annotations without the icon
124
- st.text_area("Annotate Your Insights:", height=100, key="annotations", help="Add your thoughts or comments here")
125
-
126
- # Function to analyze and identify research gaps by comparing chunks from different papers
127
- def analyze_research_gaps(chunks):
128
- # Here we would compare text from different papers to identify discrepancies
129
- gaps = []
130
- for i, chunk_1 in enumerate(chunks):
131
- for j, chunk_2 in enumerate(chunks):
132
- if i != j:
133
- # Simple heuristic to compare chunks for inconsistencies or gaps
134
- if chunk_1[:100] != chunk_2[:100]: # Checking first 100 characters for difference
135
- gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
136
- return "\n".join(gaps) if gaps else "No major inconsistencies found."
137
-
138
- # Function to identify unanswered questions based on comparative analysis of multiple papers
139
- def identify_research_gaps(chunks):
140
- unanswered_questions = []
141
- # Simulate a simple search for keywords related to unanswered questions
142
- for chunk in chunks:
143
- if "future research" in chunk or "unanswered questions" in chunk:
144
- unanswered_questions.append(chunk)
145
- return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
 
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
  from groq import Groq
 
9
 
10
+ # Groq API Key
11
  GROQ_API_KEY = "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"
12
  client = Groq(api_key=GROQ_API_KEY)
13
 
 
21
  # Store Metadata
22
  metadata_store = []
23
 
24
+ # Function Definitions
25
  def extract_text_from_pdf(pdf_file):
26
  pdf_reader = PdfReader(pdf_file)
27
  text = ""
 
29
  text += page.extract_text()
30
  return text
31
 
 
32
  def chunk_text(text, chunk_size=500):
33
  words = text.split()
34
  return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
35
 
 
36
  def generate_embeddings(chunks):
37
  return embedding_model.encode(chunks)
38
 
 
39
  def store_embeddings(embeddings, metadata):
40
  faiss_index.add(np.array(embeddings))
41
  metadata_store.extend(metadata)
42
 
 
43
  def retrieve_relevant_chunks(query, k=5):
44
  query_embedding = embedding_model.encode([query])
45
  distances, indices = faiss_index.search(query_embedding, k)
 
48
  ]
49
  return valid_results
50
 
 
51
  def ask_groq_api(question, context):
52
  chat_completion = client.chat.completions.create(
53
  messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
 
55
  )
56
  return chat_completion.choices[0].message.content
57
 
58
+ def analyze_research_gaps(chunks):
59
+ gaps = []
60
+ for i, chunk_1 in enumerate(chunks):
61
+ for j, chunk_2 in enumerate(chunks):
62
+ if i != j:
63
+ if chunk_1[:100] != chunk_2[:100]: # Example heuristic
64
+ gaps.append(f"Potential inconsistency between chunk {i} and chunk {j}.")
65
+ return "\n".join(gaps) if gaps else "No major inconsistencies found."
66
+
67
+ def identify_research_gaps(chunks):
68
+ unanswered_questions = []
69
+ for chunk in chunks:
70
+ if "future research" in chunk or "unanswered questions" in chunk:
71
+ unanswered_questions.append(chunk)
72
+ return "\n".join(unanswered_questions) if unanswered_questions else "No specific unanswered questions found."
73
+
74
+ # Main Streamlit App Logic
75
  st.title("RAG-Based Research Paper Analyzer")
76
 
77
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type="pdf")
 
90
 
91
  st.success("Files uploaded and processed successfully!")
92
 
93
+ if st.button("Identify Research Gaps", help="Find unanswered questions or areas where research is lacking"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  research_gap_analysis = identify_research_gaps(all_chunks)
95
+ st.write(f"**Research Gaps Identified:** {research_gap_analysis}")