hamzaherry commited on
Commit
9e7ddd4
Β·
verified Β·
1 Parent(s): 5ee6df9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -122
app.py CHANGED
@@ -1,127 +1,34 @@
1
- import os
2
- import streamlit as st
3
- from PyPDF2 import PdfReader
4
- from sentence_transformers import SentenceTransformer
5
- import faiss
6
- import matplotlib.pyplot as plt
7
- import numpy as np
8
- from groq import Groq
9
-
10
- GROQ_API_KEY = "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"
11
- client = Groq(api_key=GROQ_API_KEY)
12
-
13
- # Initialize Embedding Model
14
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
15
-
16
- # Initialize FAISS Index
17
- embedding_dim = 384 # Dimensionality of 'all-MiniLM-L6-v2'
18
- faiss_index = faiss.IndexFlatL2(embedding_dim)
19
-
20
- # Store Metadata
21
- metadata_store = []
22
-
23
- def extract_text_from_pdf(pdf_file):
24
- pdf_reader = PdfReader(pdf_file)
25
- text = ""
26
- for page in pdf_reader.pages:
27
- text += page.extract_text()
28
- return text
29
-
30
- def chunk_text(text, chunk_size=500):
31
- words = text.split()
32
- return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
33
-
34
- def generate_embeddings(chunks):
35
- return embedding_model.encode(chunks)
36
-
37
- def store_embeddings(embeddings, metadata):
38
- faiss_index.add(np.array(embeddings))
39
- metadata_store.extend(metadata)
40
-
41
- def retrieve_relevant_chunks(query, k=5):
42
- query_embedding = embedding_model.encode([query])
43
- distances, indices = faiss_index.search(query_embedding, k)
44
-
45
- valid_results = [
46
- (metadata_store[i], distances[0][j])
47
- for j, i in enumerate(indices[0])
48
- if i < len(metadata_store)
49
- ]
50
- return valid_results
51
-
52
- def identify_research_gaps():
53
- gap_summary = []
54
  for i, chunk_1 in enumerate(metadata_store):
55
  for j, chunk_2 in enumerate(metadata_store):
56
  if i >= j: # Avoid duplicate comparisons
57
  continue
 
58
  if "not" in chunk_1["chunk"] and "is" in chunk_2["chunk"]:
59
- gap_summary.append(f"Potential contradiction between chunks {i} and {j}")
60
- return gap_summary
61
-
62
- def ask_groq_api(question, context):
63
- chat_completion = client.chat.completions.create(
64
- messages=[{"role": "user", "content": f"{context}\n\n{question}"}],
65
- model="llama3-8b-8192"
66
- )
67
- return chat_completion.choices[0].message.content
68
-
69
- # Streamlit App
70
- st.set_page_config(page_title="Research Analyzer", page_icon="πŸ“š")
71
-
72
- st.title("πŸ“„ Research Paper Analyzer with Gap Identification")
73
- st.write("Easily analyze and identify gaps in your research papers. 🧠")
74
-
75
- uploaded_files = st.file_uploader("πŸ“€ Upload PDF Files", accept_multiple_files=True, type="pdf")
76
-
77
- if uploaded_files:
78
- st.success("πŸ“‚ Files uploaded successfully! Processing...")
79
-
80
- all_chunks = []
81
- all_metadata = []
82
-
83
- for uploaded_file in uploaded_files:
84
- text = extract_text_from_pdf(uploaded_file)
85
- chunks = chunk_text(text)
86
- embeddings = generate_embeddings(chunks)
87
- metadata = [{"chunk": chunk, "file_name": uploaded_file.name} for chunk in chunks]
88
- store_embeddings(embeddings, metadata)
89
- all_chunks.extend(chunks)
90
- all_metadata.extend(metadata)
91
-
92
- st.success("βœ… Files processed and embeddings created!")
93
-
94
- if st.button("πŸ“š View Topic Summaries"):
95
- for chunk in all_chunks[:3]:
96
- st.write("πŸ”Ή", chunk)
97
-
98
- user_question = st.text_input("❓ Ask a question about the uploaded papers:")
99
- if user_question:
100
- relevant_chunks = retrieve_relevant_chunks(user_question)
101
- if relevant_chunks:
102
- context = "\n\n".join([chunk['chunk'] for chunk, _ in relevant_chunks])
103
- answer = ask_groq_api(user_question, context)
104
- st.write("**Answer:**", answer)
105
- else:
106
- st.write("⚠️ No relevant sections found for your question.")
107
-
108
- if st.button("πŸ” Identify Research Gaps"):
109
- research_gaps = identify_research_gaps()
110
- if research_gaps:
111
- st.write("### πŸ› οΈ Research Gaps Identified:")
112
- for gap in research_gaps:
113
- st.write(f"πŸ”Έ {gap}")
114
- else:
115
- st.write("βœ… No significant research gaps identified.")
116
-
117
- if st.button("πŸ“Š Generate Scatter Plot"):
118
- st.write("πŸ“ˆ Generating scatter plot for methods vs. results...")
119
- # Example scatter plot (replace with real data)
120
- x = np.random.rand(10)
121
- y = np.random.rand(10)
122
- plt.scatter(x, y)
123
- plt.xlabel("Methods")
124
- plt.ylabel("Results")
125
- st.pyplot(plt)
126
-
127
- st.text_area("πŸ“ Annotate Your Insights:", height=100, key="annotations")
 
1
+ def identify_major_research_gaps():
2
+ # Extract high-level contradictions or differences
3
+ key_differences = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  for i, chunk_1 in enumerate(metadata_store):
5
  for j, chunk_2 in enumerate(metadata_store):
6
  if i >= j: # Avoid duplicate comparisons
7
  continue
8
+ # Example heuristic to identify key differences
9
  if "not" in chunk_1["chunk"] and "is" in chunk_2["chunk"]:
10
+ key_differences.append(
11
+ f"Potential inconsistency in methodologies or interpretations between sections of the document."
12
+ )
13
+ elif "should" in chunk_1["chunk"] and "must" in chunk_2["chunk"]:
14
+ key_differences.append(
15
+ f"Divergence in recommendations or conclusions observed between sections."
16
+ )
17
+
18
+ # Summarize key points for display
19
+ if key_differences:
20
+ summary = (
21
+ f"The analysis identified several key areas of divergence in the reviewed research papers:\n\n"
22
+ + "\n".join(f"β€’ {difference}" for difference in set(key_differences))
23
+ + "\n\n"
24
+ "These findings indicate areas where further investigation or clarification may be beneficial."
25
+ )
26
+ return summary
27
+ else:
28
+ return "No significant research gaps or inconsistencies were identified across the reviewed papers."
29
+
30
+ # Replace logic inside the button event
31
+ if st.button("πŸ” Identify Research Gaps"):
32
+ research_gap_summary = identify_major_research_gaps()
33
+ st.write("### πŸ› οΈ Research Gaps Identified:")
34
+ st.write(research_gap_summary)