Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# ------------------------------
|
2 |
-
# Enhanced NeuroResearch AI System with
|
3 |
-
# Dynamic Difficulty Gradient, and Meta-Refinement Inspired by LADDER
|
4 |
# ------------------------------
|
5 |
import logging
|
6 |
import os
|
@@ -12,7 +12,7 @@ import sys
|
|
12 |
from datetime import datetime
|
13 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14 |
from typing import List, Dict, Any, Optional, Sequence
|
15 |
-
|
16 |
import requests
|
17 |
import streamlit as st
|
18 |
|
@@ -65,6 +65,7 @@ class ResearchConfig:
|
|
65 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
66 |
"Quantum ML Frontiers"
|
67 |
}
|
|
|
68 |
ANALYSIS_TEMPLATE = (
|
69 |
"Analyze these technical documents with scientific rigor:\n{context}\n\n"
|
70 |
"Respond with:\n"
|
@@ -75,6 +76,14 @@ class ResearchConfig:
|
|
75 |
"5. Limitations & Future Directions\n\n"
|
76 |
"Format: Markdown with LaTeX mathematical notation where applicable"
|
77 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
80 |
st.error(
|
@@ -173,15 +182,11 @@ class ResearchRetriever:
|
|
173 |
def retrieve(self, query: str, domain: str) -> List[Any]:
|
174 |
"""
|
175 |
Retrieves documents based on the query and domain.
|
|
|
176 |
"""
|
177 |
try:
|
178 |
-
|
179 |
-
|
180 |
-
elif domain == "development":
|
181 |
-
return self.development_retriever.invoke(query)
|
182 |
-
else:
|
183 |
-
logger.warning(f"Domain '{domain}' not recognized.")
|
184 |
-
return []
|
185 |
except Exception as e:
|
186 |
logger.exception(f"Retrieval error for domain '{domain}'.")
|
187 |
return []
|
@@ -295,13 +300,15 @@ class ResearchWorkflow:
|
|
295 |
|
296 |
def ingest_query(self, state: AgentState) -> Dict:
|
297 |
"""
|
298 |
-
Ingests the research query and initializes the refinement counter and
|
299 |
"""
|
300 |
try:
|
301 |
query = state["messages"][-1].content
|
302 |
-
#
|
303 |
-
|
304 |
-
|
|
|
|
|
305 |
return {
|
306 |
"messages": [AIMessage(content="Query ingested successfully")],
|
307 |
"context": new_context,
|
@@ -317,11 +324,18 @@ class ResearchWorkflow:
|
|
317 |
"""
|
318 |
try:
|
319 |
query = state["context"]["raw_query"]
|
320 |
-
|
|
|
321 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
322 |
return {
|
323 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
324 |
-
"context": {
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
}
|
326 |
except Exception as e:
|
327 |
logger.exception("Error during document retrieval.")
|
@@ -330,19 +344,28 @@ class ResearchWorkflow:
|
|
330 |
def analyze_content(self, state: AgentState) -> Dict:
|
331 |
"""
|
332 |
Analyzes the retrieved documents using the DeepSeek API.
|
|
|
333 |
"""
|
334 |
try:
|
335 |
docs = state["context"].get("documents", [])
|
336 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
337 |
-
|
338 |
-
|
|
|
|
|
|
|
339 |
if "error" in response:
|
340 |
logger.error("DeepSeek response error during analysis.")
|
341 |
return self._error_state(response["error"])
|
342 |
logger.info("Content analysis completed.")
|
343 |
return {
|
344 |
"messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
|
345 |
-
"context": {
|
|
|
|
|
|
|
|
|
|
|
346 |
}
|
347 |
except Exception as e:
|
348 |
logger.exception("Error during content analysis.")
|
@@ -372,7 +395,8 @@ class ResearchWorkflow:
|
|
372 |
"""
|
373 |
Refines the analysis report if validation fails.
|
374 |
Implements an innovative meta-refinement mechanism inspired by LADDER.
|
375 |
-
|
|
|
376 |
"""
|
377 |
try:
|
378 |
current_count = state["context"].get("refine_count", 0)
|
@@ -523,16 +547,28 @@ class ResearchInterface:
|
|
523 |
height=200,
|
524 |
placeholder="Enter technical research question..."
|
525 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
if st.button("Execute Analysis", type="primary"):
|
527 |
-
self._execute_analysis(query)
|
528 |
|
529 |
-
def _execute_analysis(self, query: str) -> None:
|
530 |
try:
|
531 |
with st.spinner("Initializing Quantum Analysis..."):
|
532 |
-
# Pass
|
533 |
results = self.workflow.app.stream({
|
534 |
"messages": [HumanMessage(content=query)],
|
535 |
-
"context": {},
|
536 |
"metadata": {}
|
537 |
}, {"recursion_limit": 100})
|
538 |
for event in results:
|
|
|
1 |
# ------------------------------
|
2 |
+
# Enhanced NeuroResearch AI System with Domain Adaptability,
|
3 |
+
# Refinement Counter, Dynamic Difficulty Gradient, and Meta-Refinement Inspired by LADDER
|
4 |
# ------------------------------
|
5 |
import logging
|
6 |
import os
|
|
|
12 |
from datetime import datetime
|
13 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
14 |
from typing import List, Dict, Any, Optional, Sequence
|
15 |
+
|
16 |
import requests
|
17 |
import streamlit as st
|
18 |
|
|
|
65 |
"Latest Trends in Machine Learning Methods Using Quantum Computing":
|
66 |
"Quantum ML Frontiers"
|
67 |
}
|
68 |
+
# Base analysis template remains unchanged.
|
69 |
ANALYSIS_TEMPLATE = (
|
70 |
"Analyze these technical documents with scientific rigor:\n{context}\n\n"
|
71 |
"Respond with:\n"
|
|
|
76 |
"5. Limitations & Future Directions\n\n"
|
77 |
"Format: Markdown with LaTeX mathematical notation where applicable"
|
78 |
)
|
79 |
+
# Domain-specific prompt additions
|
80 |
+
DOMAIN_PROMPTS = {
|
81 |
+
"Biomedical Research": "Consider clinical terminology, experimental design, and patient outcomes. Focus on recent biomedical breakthroughs and treatment approaches.",
|
82 |
+
"Legal Research": "Focus on legal language, precedents, and case law. Ensure nuanced interpretation of legal reasoning and statutory analysis.",
|
83 |
+
"Environmental and Energy Studies": "Emphasize renewable energy technologies, climate impact, and policy implications. Include details on efficiency and scalability.",
|
84 |
+
"Competitive Programming and Theoretical Computer Science": "Concentrate on algorithmic complexity, proofs, and novel computational techniques. Emphasize innovation in problem solving.",
|
85 |
+
"Social Sciences": "Highlight economic trends, sociological data, and behavioral insights. Focus on correlations, trends, and policy impacts."
|
86 |
+
}
|
87 |
|
88 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
89 |
st.error(
|
|
|
182 |
def retrieve(self, query: str, domain: str) -> List[Any]:
|
183 |
"""
|
184 |
Retrieves documents based on the query and domain.
|
185 |
+
For now, domain differentiation is minimal; however, you can extend this method to use domain-specific collections.
|
186 |
"""
|
187 |
try:
|
188 |
+
# For demonstration, we use the "research" collection for all domains.
|
189 |
+
return self.research_retriever.invoke(query)
|
|
|
|
|
|
|
|
|
|
|
190 |
except Exception as e:
|
191 |
logger.exception(f"Retrieval error for domain '{domain}'.")
|
192 |
return []
|
|
|
300 |
|
301 |
def ingest_query(self, state: AgentState) -> Dict:
|
302 |
"""
|
303 |
+
Ingests the research query and initializes the refinement counter, refinement history, and domain.
|
304 |
"""
|
305 |
try:
|
306 |
query = state["messages"][-1].content
|
307 |
+
# Retrieve domain from the query context if available; otherwise, default to "Biomedical Research"
|
308 |
+
domain = state.get("domain", "Biomedical Research")
|
309 |
+
# Initialize context with raw query, selected domain, refinement counter, and empty refinement history
|
310 |
+
new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
|
311 |
+
logger.info(f"Query ingested. Domain: {domain}")
|
312 |
return {
|
313 |
"messages": [AIMessage(content="Query ingested successfully")],
|
314 |
"context": new_context,
|
|
|
324 |
"""
|
325 |
try:
|
326 |
query = state["context"]["raw_query"]
|
327 |
+
# For demonstration, we use the "research" collection for all domains.
|
328 |
+
docs = retriever.retrieve(query, state["context"].get("domain", "Biomedical Research"))
|
329 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
330 |
return {
|
331 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
332 |
+
"context": {
|
333 |
+
"documents": docs,
|
334 |
+
"retrieval_time": time.time(),
|
335 |
+
"refine_count": state["context"].get("refine_count", 0),
|
336 |
+
"refinement_history": state["context"].get("refinement_history", []),
|
337 |
+
"domain": state["context"].get("domain", "Biomedical Research")
|
338 |
+
}
|
339 |
}
|
340 |
except Exception as e:
|
341 |
logger.exception("Error during document retrieval.")
|
|
|
344 |
def analyze_content(self, state: AgentState) -> Dict:
|
345 |
"""
|
346 |
Analyzes the retrieved documents using the DeepSeek API.
|
347 |
+
Augments the prompt with domain-specific instructions.
|
348 |
"""
|
349 |
try:
|
350 |
docs = state["context"].get("documents", [])
|
351 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
352 |
+
domain = state["context"].get("domain", "Biomedical Research")
|
353 |
+
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
|
354 |
+
# Combine domain-specific instructions with the base analysis template
|
355 |
+
full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
|
356 |
+
response = self.processor.process_query(full_prompt)
|
357 |
if "error" in response:
|
358 |
logger.error("DeepSeek response error during analysis.")
|
359 |
return self._error_state(response["error"])
|
360 |
logger.info("Content analysis completed.")
|
361 |
return {
|
362 |
"messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
|
363 |
+
"context": {
|
364 |
+
"analysis": response,
|
365 |
+
"refine_count": state["context"].get("refine_count", 0),
|
366 |
+
"refinement_history": state["context"].get("refinement_history", []),
|
367 |
+
"domain": domain
|
368 |
+
}
|
369 |
}
|
370 |
except Exception as e:
|
371 |
logger.exception("Error during content analysis.")
|
|
|
395 |
"""
|
396 |
Refines the analysis report if validation fails.
|
397 |
Implements an innovative meta-refinement mechanism inspired by LADDER.
|
398 |
+
Tracks refinement history, uses a dynamic difficulty gradient, and if the refinement count exceeds a threshold,
|
399 |
+
summarizes the history into a final output.
|
400 |
"""
|
401 |
try:
|
402 |
current_count = state["context"].get("refine_count", 0)
|
|
|
547 |
height=200,
|
548 |
placeholder="Enter technical research question..."
|
549 |
)
|
550 |
+
# Domain selection for research use cases
|
551 |
+
domain = st.selectbox(
|
552 |
+
"Select Research Domain:",
|
553 |
+
options=[
|
554 |
+
"Biomedical Research",
|
555 |
+
"Legal Research",
|
556 |
+
"Environmental and Energy Studies",
|
557 |
+
"Competitive Programming and Theoretical Computer Science",
|
558 |
+
"Social Sciences"
|
559 |
+
],
|
560 |
+
index=0
|
561 |
+
)
|
562 |
if st.button("Execute Analysis", type="primary"):
|
563 |
+
self._execute_analysis(query, domain)
|
564 |
|
565 |
+
def _execute_analysis(self, query: str, domain: str) -> None:
|
566 |
try:
|
567 |
with st.spinner("Initializing Quantum Analysis..."):
|
568 |
+
# Pass domain into the context by adding it to the initial state
|
569 |
results = self.workflow.app.stream({
|
570 |
"messages": [HumanMessage(content=query)],
|
571 |
+
"context": {"domain": domain},
|
572 |
"metadata": {}
|
573 |
}, {"recursion_limit": 100})
|
574 |
for event in results:
|