Upload 4 files
Browse files- agents.py +43 -0
- app.py +118 -0
- data_loader.py +70 -0
- requirements.txt +9 -0
agents.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from autogen import AssistantAgent
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
# Load environment variables
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
class ResearchAgents:
|
9 |
+
def __init__(self, api_key):
|
10 |
+
self.groq_api_key = api_key
|
11 |
+
self.llm_config = {'config_list': [{'model': 'llama-3.3-70b-versatile', 'api_key': self.groq_api_key, 'api_type': "groq"}]}
|
12 |
+
|
13 |
+
# Summarizer Agent - Summarizes research papers
|
14 |
+
self.summarizer_agent = AssistantAgent(
|
15 |
+
name="summarizer_agent",
|
16 |
+
system_message="Summarize the retrieved research papers and present concise summaries to the user, JUST GIVE THE RELEVANT SUMMARIES OF THE RESEARCH PAPER AND NOT YOUR THOUGHT PROCESS.",
|
17 |
+
llm_config=self.llm_config,
|
18 |
+
human_input_mode="NEVER",
|
19 |
+
code_execution_config=False
|
20 |
+
)
|
21 |
+
|
22 |
+
# Advantages and Disadvantages Agent - Analyzes pros and cons
|
23 |
+
self.advantages_disadvantages_agent = AssistantAgent(
|
24 |
+
name="advantages_disadvantages_agent",
|
25 |
+
system_message="Analyze the summaries of the research papers and provide a list of advantages and disadvantages for each paper in a pointwise format. JUST GIVE THE ADVANTAGES AND DISADVANTAGES, NOT YOUR THOUGHT PROCESS",
|
26 |
+
llm_config=self.llm_config,
|
27 |
+
human_input_mode="NEVER",
|
28 |
+
code_execution_config=False
|
29 |
+
)
|
30 |
+
|
31 |
+
def summarize_paper(self, paper_summary):
|
32 |
+
"""Generates a summary of the research paper."""
|
33 |
+
summary_response = self.summarizer_agent.generate_reply(
|
34 |
+
messages=[{"role": "user", "content": f"Summarize this paper: {paper_summary}"}]
|
35 |
+
)
|
36 |
+
return summary_response.get("content", "Summarization failed!") if isinstance(summary_response, dict) else str(summary_response)
|
37 |
+
|
38 |
+
def analyze_advantages_disadvantages(self, summary):
|
39 |
+
"""Generates advantages and disadvantages of the research paper."""
|
40 |
+
adv_dis_response = self.advantages_disadvantages_agent.generate_reply(
|
41 |
+
messages=[{"role": "user", "content": f"Provide advantages and disadvantages for this paper: {summary}"}]
|
42 |
+
)
|
43 |
+
return adv_dis_response.get("content", "Advantages and disadvantages analysis failed!")
|
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from agents import ResearchAgents
|
5 |
+
from data_loader import DataLoader
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
# Move set_page_config() to be the first Streamlit command.
|
10 |
+
st.set_page_config(
|
11 |
+
page_title="Autogen Agent",
|
12 |
+
page_icon="⚡",
|
13 |
+
initial_sidebar_state="expanded"
|
14 |
+
)
|
15 |
+
|
16 |
+
# --- Added Custom CSS for a Professional Look ---
|
17 |
+
custom_css = """
|
18 |
+
<style>
|
19 |
+
body {
|
20 |
+
background-color: #f5f5f5;
|
21 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
22 |
+
}
|
23 |
+
.css-18e3th9, .css-1d391kg { /* Streamlit title and header adjustments */
|
24 |
+
color: #333333;
|
25 |
+
}
|
26 |
+
.stButton>button {
|
27 |
+
background-color: #4a90e2;
|
28 |
+
color: #ffffff;
|
29 |
+
border-radius: 5px;
|
30 |
+
border: none;
|
31 |
+
}
|
32 |
+
.sidebar .sidebar-content {
|
33 |
+
background-color: #ffffff;
|
34 |
+
}
|
35 |
+
.stMarkdown, .css-1d391kg {
|
36 |
+
color: #555555;
|
37 |
+
}
|
38 |
+
</style>
|
39 |
+
"""
|
40 |
+
st.markdown(custom_css, unsafe_allow_html=True)
|
41 |
+
|
42 |
+
# Streamlit UI Title
|
43 |
+
st.title("📚 Virtual Research Assistant")
|
44 |
+
|
45 |
+
num_results = 5
|
46 |
+
source_choice = st.sidebar.multiselect("Select Data Sources", options=["ArXiv", "Google Scholar"], default=["ArXiv"])
|
47 |
+
# Sidebar with features and footer
|
48 |
+
with st.sidebar:
|
49 |
+
st.divider()
|
50 |
+
st.markdown("<h3 style='text-align: center; color: #333;'>Key Features</h3>", unsafe_allow_html=True)
|
51 |
+
st.markdown("""
|
52 |
+
<ul style='list-style: none; padding: 0;'>
|
53 |
+
<li style='margin-bottom: 8px;'>🔍 <strong>Multi-Source Research Retrieval</strong></li>
|
54 |
+
<li style='margin-bottom: 8px;'>🤖 <strong>Integrated Chatbot Interaction</strong></li>
|
55 |
+
<li style='margin-bottom: 8px;'>✨ <strong>Advanced Summarization</strong></li>
|
56 |
+
<li style='margin-bottom: 8px;'>🔄 <strong>Automatic Query Expansion & Refinement</strong></li>
|
57 |
+
<li style='margin-bottom: 8px;'>📊 <strong>Visual Data Presentation</strong></li>
|
58 |
+
</ul>
|
59 |
+
""", unsafe_allow_html=True)
|
60 |
+
st.divider()
|
61 |
+
st.markdown("<p style='text-align: center;'><em>Built with Groq | Autogen</em></p>", unsafe_allow_html=True)
|
62 |
+
|
63 |
+
# Retrieve the API key from environment variables
|
64 |
+
groq_api_key = os.getenv("GROQ_API_KEY")
|
65 |
+
|
66 |
+
# Check if API key is set, else stop execution
|
67 |
+
if not groq_api_key:
|
68 |
+
st.error("GROQ_API_KEY is missing. Please set it in your environment variables.")
|
69 |
+
st.stop()
|
70 |
+
|
71 |
+
# Initialize AI Agents for summarization and analysis
|
72 |
+
agents = ResearchAgents(groq_api_key)
|
73 |
+
|
74 |
+
# Initialize DataLoader for fetching research papers
|
75 |
+
data_loader = DataLoader()
|
76 |
+
|
77 |
+
# Use chat_input instead of text_input for entering the research topic.
|
78 |
+
query = st.chat_input("Enter a research topic:")
|
79 |
+
|
80 |
+
# Trigger the search automatically if a query is provided.
|
81 |
+
if query:
|
82 |
+
with st.spinner("Fetching research papers..."): # Show a loading spinner
|
83 |
+
|
84 |
+
all_papers = []
|
85 |
+
# Fetch from selected sources based on sidebar choices
|
86 |
+
if "ArXiv" in source_choice:
|
87 |
+
arxiv_papers = data_loader.fetch_arxiv_papers(query, limit=num_results)
|
88 |
+
all_papers.extend(arxiv_papers)
|
89 |
+
if "Google Scholar" in source_choice:
|
90 |
+
google_scholar_papers = data_loader.fetch_google_scholar_papers(query)
|
91 |
+
all_papers.extend(google_scholar_papers)
|
92 |
+
|
93 |
+
# If no papers are found, display an error message
|
94 |
+
if not all_papers:
|
95 |
+
st.error("Failed to fetch papers. Try again!")
|
96 |
+
else:
|
97 |
+
processed_papers = []
|
98 |
+
|
99 |
+
# Process each paper: generate summary and analyze advantages/disadvantages
|
100 |
+
for paper in all_papers:
|
101 |
+
summary = agents.summarize_paper(paper['summary']) # Generate summary
|
102 |
+
adv_dis = agents.analyze_advantages_disadvantages(summary) # Analyze pros/cons
|
103 |
+
|
104 |
+
processed_papers.append({
|
105 |
+
"title": paper["title"],
|
106 |
+
"link": paper["link"],
|
107 |
+
"summary": summary,
|
108 |
+
"advantages_disadvantages": adv_dis,
|
109 |
+
})
|
110 |
+
|
111 |
+
# Display the processed research papers
|
112 |
+
st.subheader("Top Research Papers:")
|
113 |
+
for i, paper in enumerate(processed_papers, 1):
|
114 |
+
st.markdown(f"### {i}. {paper['title']}") # Paper title
|
115 |
+
st.markdown(f"🔗 [Read Paper]({paper['link']})") # Paper link
|
116 |
+
st.write(f"**Summary:** {paper['summary']}") # Paper summary
|
117 |
+
st.write(f"{paper['advantages_disadvantages']}") # Pros/cons analysis
|
118 |
+
st.markdown("---") # Separator between papers
|
data_loader.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import xml.etree.ElementTree as ET
|
3 |
+
from scholarly import scholarly
|
4 |
+
|
5 |
+
class DataLoader:
|
6 |
+
def __init__(self):
|
7 |
+
print("DataLoader Init")
|
8 |
+
def fetch_arxiv_papers(self, query, limit=None): # Updated signature
|
9 |
+
"""
|
10 |
+
Fetches top 5 research papers from ArXiv based on the user query.
|
11 |
+
If <5 papers are found, expands the search using related topics.
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
list: A list of dictionaries containing paper details (title, summary, link).
|
15 |
+
"""
|
16 |
+
|
17 |
+
def search_arxiv(query):
|
18 |
+
"""Helper function to query ArXiv API."""
|
19 |
+
url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
|
20 |
+
response = requests.get(url)
|
21 |
+
if response.status_code == 200:
|
22 |
+
root = ET.fromstring(response.text)
|
23 |
+
return [
|
24 |
+
{
|
25 |
+
"title": entry.find("{http://www.w3.org/2005/Atom}title").text,
|
26 |
+
"summary": entry.find("{http://www.w3.org/2005/Atom}summary").text,
|
27 |
+
"link": entry.find("{http://www.w3.org/2005/Atom}id").text
|
28 |
+
}
|
29 |
+
for entry in root.findall("{http://www.w3.org/2005/Atom}entry")
|
30 |
+
]
|
31 |
+
return []
|
32 |
+
|
33 |
+
papers = search_arxiv(query)
|
34 |
+
|
35 |
+
if len(papers) < 5 and self.search_agent: # If fewer than 5 papers, expand search
|
36 |
+
related_topics_response = self.search_agent.generate_reply(
|
37 |
+
messages=[{"role": "user", "content": f"Suggest 3 related research topics for '{query}'"}]
|
38 |
+
)
|
39 |
+
related_topics = related_topics_response.get("content", "").split("\n")
|
40 |
+
|
41 |
+
for topic in related_topics:
|
42 |
+
topic = topic.strip()
|
43 |
+
if topic and len(papers) < 5:
|
44 |
+
new_papers = search_arxiv(topic)
|
45 |
+
papers.extend(new_papers)
|
46 |
+
papers = papers[:5] # Ensure max 5 papers
|
47 |
+
|
48 |
+
if limit is not None:
|
49 |
+
papers = papers[:limit]
|
50 |
+
|
51 |
+
return papers
|
52 |
+
|
53 |
+
def fetch_google_scholar_papers(self, query):
|
54 |
+
"""
|
55 |
+
Fetches top 5 research papers from Google Scholar.
|
56 |
+
Returns:
|
57 |
+
list: A list of dictionaries containing paper details (title, summary, link)
|
58 |
+
"""
|
59 |
+
papers = []
|
60 |
+
search_results = scholarly.search_pubs(query)
|
61 |
+
|
62 |
+
for i, paper in enumerate(search_results):
|
63 |
+
if i >= 5:
|
64 |
+
break
|
65 |
+
papers.append({
|
66 |
+
"title": paper["bib"]["title"],
|
67 |
+
"summary": paper["bib"].get("abstract", "No summary available"),
|
68 |
+
"link": paper.get("pub_url", "No link available")
|
69 |
+
})
|
70 |
+
return papers
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain-community # Extends LangChain with community-built tools
|
2 |
+
langchain-core # Core components for building LLM apps
|
3 |
+
streamlit # Creates user-friendly web interfaces
|
4 |
+
langchain # Enables LLM-based workflows and integrations
|
5 |
+
python-dotenv # Manages environment variables securely
|
6 |
+
langchain_groq # Integrates GROQ features with LangChain
|
7 |
+
transformers # to calculate get_token_ids.
|
8 |
+
scholarly
|
9 |
+
autogen
|