SURESHBEEKHANI commited on
Commit
b4c04de
·
verified ·
1 Parent(s): 5baa157

Upload 4 files

Browse files
Files changed (4) hide show
  1. agents.py +43 -0
  2. app.py +118 -0
  3. data_loader.py +70 -0
  4. requirements.txt +9 -0
agents.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from autogen import AssistantAgent
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+ class ResearchAgents:
9
+ def __init__(self, api_key):
10
+ self.groq_api_key = api_key
11
+ self.llm_config = {'config_list': [{'model': 'llama-3.3-70b-versatile', 'api_key': self.groq_api_key, 'api_type': "groq"}]}
12
+
13
+ # Summarizer Agent - Summarizes research papers
14
+ self.summarizer_agent = AssistantAgent(
15
+ name="summarizer_agent",
16
+ system_message="Summarize the retrieved research papers and present concise summaries to the user, JUST GIVE THE RELEVANT SUMMARIES OF THE RESEARCH PAPER AND NOT YOUR THOUGHT PROCESS.",
17
+ llm_config=self.llm_config,
18
+ human_input_mode="NEVER",
19
+ code_execution_config=False
20
+ )
21
+
22
+ # Advantages and Disadvantages Agent - Analyzes pros and cons
23
+ self.advantages_disadvantages_agent = AssistantAgent(
24
+ name="advantages_disadvantages_agent",
25
+ system_message="Analyze the summaries of the research papers and provide a list of advantages and disadvantages for each paper in a pointwise format. JUST GIVE THE ADVANTAGES AND DISADVANTAGES, NOT YOUR THOUGHT PROCESS",
26
+ llm_config=self.llm_config,
27
+ human_input_mode="NEVER",
28
+ code_execution_config=False
29
+ )
30
+
31
+ def summarize_paper(self, paper_summary):
32
+ """Generates a summary of the research paper."""
33
+ summary_response = self.summarizer_agent.generate_reply(
34
+ messages=[{"role": "user", "content": f"Summarize this paper: {paper_summary}"}]
35
+ )
36
+ return summary_response.get("content", "Summarization failed!") if isinstance(summary_response, dict) else str(summary_response)
37
+
38
+ def analyze_advantages_disadvantages(self, summary):
39
+ """Generates advantages and disadvantages of the research paper."""
40
+ adv_dis_response = self.advantages_disadvantages_agent.generate_reply(
41
+ messages=[{"role": "user", "content": f"Provide advantages and disadvantages for this paper: {summary}"}]
42
+ )
43
+ return adv_dis_response.get("content", "Advantages and disadvantages analysis failed!")
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from agents import ResearchAgents
5
+ from data_loader import DataLoader
6
+
7
+ load_dotenv()
8
+
9
+ # Move set_page_config() to be the first Streamlit command.
10
+ st.set_page_config(
11
+ page_title="Autogen Agent",
12
+ page_icon="⚡",
13
+ initial_sidebar_state="expanded"
14
+ )
15
+
16
+ # --- Added Custom CSS for a Professional Look ---
17
+ custom_css = """
18
+ <style>
19
+ body {
20
+ background-color: #f5f5f5;
21
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
22
+ }
23
+ .css-18e3th9, .css-1d391kg { /* Streamlit title and header adjustments */
24
+ color: #333333;
25
+ }
26
+ .stButton>button {
27
+ background-color: #4a90e2;
28
+ color: #ffffff;
29
+ border-radius: 5px;
30
+ border: none;
31
+ }
32
+ .sidebar .sidebar-content {
33
+ background-color: #ffffff;
34
+ }
35
+ .stMarkdown, .css-1d391kg {
36
+ color: #555555;
37
+ }
38
+ </style>
39
+ """
40
+ st.markdown(custom_css, unsafe_allow_html=True)
41
+
42
+ # Streamlit UI Title
43
+ st.title("📚 Virtual Research Assistant")
44
+
45
+ num_results = 5
46
+ source_choice = st.sidebar.multiselect("Select Data Sources", options=["ArXiv", "Google Scholar"], default=["ArXiv"])
47
+ # Sidebar with features and footer
48
+ with st.sidebar:
49
+ st.divider()
50
+ st.markdown("<h3 style='text-align: center; color: #333;'>Key Features</h3>", unsafe_allow_html=True)
51
+ st.markdown("""
52
+ <ul style='list-style: none; padding: 0;'>
53
+ <li style='margin-bottom: 8px;'>🔍 <strong>Multi-Source Research Retrieval</strong></li>
54
+ <li style='margin-bottom: 8px;'>🤖 <strong>Integrated Chatbot Interaction</strong></li>
55
+ <li style='margin-bottom: 8px;'>✨ <strong>Advanced Summarization</strong></li>
56
+ <li style='margin-bottom: 8px;'>🔄 <strong>Automatic Query Expansion & Refinement</strong></li>
57
+ <li style='margin-bottom: 8px;'>📊 <strong>Visual Data Presentation</strong></li>
58
+ </ul>
59
+ """, unsafe_allow_html=True)
60
+ st.divider()
61
+ st.markdown("<p style='text-align: center;'><em>Built with Groq | Autogen</em></p>", unsafe_allow_html=True)
62
+
63
+ # Retrieve the API key from environment variables
64
+ groq_api_key = os.getenv("GROQ_API_KEY")
65
+
66
+ # Check if API key is set, else stop execution
67
+ if not groq_api_key:
68
+ st.error("GROQ_API_KEY is missing. Please set it in your environment variables.")
69
+ st.stop()
70
+
71
+ # Initialize AI Agents for summarization and analysis
72
+ agents = ResearchAgents(groq_api_key)
73
+
74
+ # Initialize DataLoader for fetching research papers
75
+ data_loader = DataLoader()
76
+
77
+ # Use chat_input instead of text_input for entering the research topic.
78
+ query = st.chat_input("Enter a research topic:")
79
+
80
+ # Trigger the search automatically if a query is provided.
81
+ if query:
82
+ with st.spinner("Fetching research papers..."): # Show a loading spinner
83
+
84
+ all_papers = []
85
+ # Fetch from selected sources based on sidebar choices
86
+ if "ArXiv" in source_choice:
87
+ arxiv_papers = data_loader.fetch_arxiv_papers(query, limit=num_results)
88
+ all_papers.extend(arxiv_papers)
89
+ if "Google Scholar" in source_choice:
90
+ google_scholar_papers = data_loader.fetch_google_scholar_papers(query)
91
+ all_papers.extend(google_scholar_papers)
92
+
93
+ # If no papers are found, display an error message
94
+ if not all_papers:
95
+ st.error("Failed to fetch papers. Try again!")
96
+ else:
97
+ processed_papers = []
98
+
99
+ # Process each paper: generate summary and analyze advantages/disadvantages
100
+ for paper in all_papers:
101
+ summary = agents.summarize_paper(paper['summary']) # Generate summary
102
+ adv_dis = agents.analyze_advantages_disadvantages(summary) # Analyze pros/cons
103
+
104
+ processed_papers.append({
105
+ "title": paper["title"],
106
+ "link": paper["link"],
107
+ "summary": summary,
108
+ "advantages_disadvantages": adv_dis,
109
+ })
110
+
111
+ # Display the processed research papers
112
+ st.subheader("Top Research Papers:")
113
+ for i, paper in enumerate(processed_papers, 1):
114
+ st.markdown(f"### {i}. {paper['title']}") # Paper title
115
+ st.markdown(f"🔗 [Read Paper]({paper['link']})") # Paper link
116
+ st.write(f"**Summary:** {paper['summary']}") # Paper summary
117
+ st.write(f"{paper['advantages_disadvantages']}") # Pros/cons analysis
118
+ st.markdown("---") # Separator between papers
data_loader.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import xml.etree.ElementTree as ET
3
+ from scholarly import scholarly
4
+
5
+ class DataLoader:
6
+ def __init__(self):
7
+ print("DataLoader Init")
8
+ def fetch_arxiv_papers(self, query, limit=None): # Updated signature
9
+ """
10
+ Fetches top 5 research papers from ArXiv based on the user query.
11
+ If <5 papers are found, expands the search using related topics.
12
+
13
+ Returns:
14
+ list: A list of dictionaries containing paper details (title, summary, link).
15
+ """
16
+
17
+ def search_arxiv(query):
18
+ """Helper function to query ArXiv API."""
19
+ url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
20
+ response = requests.get(url)
21
+ if response.status_code == 200:
22
+ root = ET.fromstring(response.text)
23
+ return [
24
+ {
25
+ "title": entry.find("{http://www.w3.org/2005/Atom}title").text,
26
+ "summary": entry.find("{http://www.w3.org/2005/Atom}summary").text,
27
+ "link": entry.find("{http://www.w3.org/2005/Atom}id").text
28
+ }
29
+ for entry in root.findall("{http://www.w3.org/2005/Atom}entry")
30
+ ]
31
+ return []
32
+
33
+ papers = search_arxiv(query)
34
+
35
+ if len(papers) < 5 and self.search_agent: # If fewer than 5 papers, expand search
36
+ related_topics_response = self.search_agent.generate_reply(
37
+ messages=[{"role": "user", "content": f"Suggest 3 related research topics for '{query}'"}]
38
+ )
39
+ related_topics = related_topics_response.get("content", "").split("\n")
40
+
41
+ for topic in related_topics:
42
+ topic = topic.strip()
43
+ if topic and len(papers) < 5:
44
+ new_papers = search_arxiv(topic)
45
+ papers.extend(new_papers)
46
+ papers = papers[:5] # Ensure max 5 papers
47
+
48
+ if limit is not None:
49
+ papers = papers[:limit]
50
+
51
+ return papers
52
+
53
+ def fetch_google_scholar_papers(self, query):
54
+ """
55
+ Fetches top 5 research papers from Google Scholar.
56
+ Returns:
57
+ list: A list of dictionaries containing paper details (title, summary, link)
58
+ """
59
+ papers = []
60
+ search_results = scholarly.search_pubs(query)
61
+
62
+ for i, paper in enumerate(search_results):
63
+ if i >= 5:
64
+ break
65
+ papers.append({
66
+ "title": paper["bib"]["title"],
67
+ "summary": paper["bib"].get("abstract", "No summary available"),
68
+ "link": paper.get("pub_url", "No link available")
69
+ })
70
+ return papers
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain-community # Extends LangChain with community-built tools
2
+ langchain-core # Core components for building LLM apps
3
+ streamlit # Creates user-friendly web interfaces
4
+ langchain # Enables LLM-based workflows and integrations
5
+ python-dotenv # Manages environment variables securely
6
+ langchain_groq # Integrates GROQ features with LangChain
7
+ transformers # to calculate get_token_ids.
8
+ scholarly
9
+ autogen