Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# ------------------------------
|
2 |
-
# UniversalResearch AI
|
3 |
# ------------------------------
|
4 |
import logging
|
5 |
import os
|
@@ -16,7 +16,7 @@ import chromadb
|
|
16 |
import requests
|
17 |
import streamlit as st
|
18 |
|
19 |
-
# LangChain
|
20 |
from langchain_openai import OpenAIEmbeddings
|
21 |
from langchain_community.vectorstores import Chroma
|
22 |
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
@@ -27,7 +27,7 @@ from langgraph.graph.message import add_messages
|
|
27 |
from typing_extensions import TypedDict, Annotated
|
28 |
from langchain.tools.retriever import create_retriever_tool
|
29 |
|
30 |
-
# Increase Python's recursion limit if needed
|
31 |
sys.setrecursionlimit(10000)
|
32 |
|
33 |
# ------------------------------
|
@@ -45,8 +45,9 @@ logger = logging.getLogger(__name__)
|
|
45 |
class AgentState(TypedDict):
|
46 |
"""
|
47 |
Stores the messages and context for each step in the workflow.
|
48 |
-
'messages'
|
49 |
-
'context'
|
|
|
50 |
"""
|
51 |
messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
|
52 |
context: Dict[str, Any]
|
@@ -57,47 +58,45 @@ class AgentState(TypedDict):
|
|
57 |
# ------------------------------
|
58 |
class ResearchConfig:
|
59 |
"""
|
60 |
-
Universal
|
|
|
61 |
|
62 |
-
Make sure to set
|
63 |
-
to enable the external LLM calls.
|
64 |
"""
|
65 |
-
|
66 |
CHROMA_PATH = "chroma_db"
|
67 |
CHUNK_SIZE = 512
|
68 |
CHUNK_OVERLAP = 64
|
69 |
MAX_CONCURRENT_REQUESTS = 5
|
70 |
EMBEDDING_DIMENSIONS = 1536
|
71 |
|
72 |
-
# Example map for featured
|
73 |
DOCUMENT_MAP = {
|
74 |
"Sample Research Document 1": "Topic A Overview",
|
75 |
"Sample Research Document 2": "Topic B Analysis",
|
76 |
"Sample Research Document 3": "Topic C Innovations"
|
77 |
}
|
78 |
|
79 |
-
#
|
80 |
ANALYSIS_TEMPLATE = (
|
81 |
"Analyze the following research documents with scientific rigor:\n{context}\n\n"
|
82 |
-
"
|
83 |
-
"1. Break down
|
84 |
"2. Iteratively refine the solution.\n"
|
85 |
-
"3. Provide
|
86 |
" a. Key Contributions\n"
|
87 |
" b. Novel Methodologies\n"
|
88 |
" c. Empirical Results (with metrics)\n"
|
89 |
" d. Potential Applications\n"
|
90 |
" e. Limitations & Future Directions\n\n"
|
91 |
-
"Format your response in Markdown with LaTeX
|
92 |
)
|
93 |
|
94 |
# Early check for missing API key
|
95 |
-
if not ResearchConfig.
|
96 |
st.error(
|
97 |
-
"""**
|
98 |
-
|
99 |
-
2. Set the secret: `DEEPSEEK_API_KEY` in your Space settings
|
100 |
-
3. Rebuild your deployment."""
|
101 |
)
|
102 |
st.stop()
|
103 |
|
@@ -106,8 +105,8 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
|
|
106 |
# ------------------------------
|
107 |
class UniversalDocumentManager:
|
108 |
"""
|
109 |
-
Manages creation of document collections for any research domain
|
110 |
-
|
111 |
"""
|
112 |
def __init__(self) -> None:
|
113 |
try:
|
@@ -117,6 +116,7 @@ class UniversalDocumentManager:
|
|
117 |
logger.error(f"Error initializing PersistentClient: {e}")
|
118 |
self.client = chromadb.Client() # Fallback to in-memory client
|
119 |
|
|
|
120 |
self.embeddings = OpenAIEmbeddings(
|
121 |
model="text-embedding-3-large",
|
122 |
dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
|
@@ -124,7 +124,7 @@ class UniversalDocumentManager:
|
|
124 |
|
125 |
def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
|
126 |
"""
|
127 |
-
Splits documents into
|
128 |
"""
|
129 |
splitter = RecursiveCharacterTextSplitter(
|
130 |
chunk_size=ResearchConfig.CHUNK_SIZE,
|
@@ -133,7 +133,7 @@ class UniversalDocumentManager:
|
|
133 |
)
|
134 |
try:
|
135 |
docs = splitter.create_documents(documents)
|
136 |
-
logger.info(f"Created {len(docs)}
|
137 |
except Exception as e:
|
138 |
logger.error(f"Error splitting documents: {e}")
|
139 |
raise e
|
@@ -148,11 +148,11 @@ class UniversalDocumentManager:
|
|
148 |
|
149 |
def _document_id(self, content: str) -> str:
|
150 |
"""
|
151 |
-
Generates a unique
|
152 |
"""
|
153 |
return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
|
154 |
|
155 |
-
#
|
156 |
udm = UniversalDocumentManager()
|
157 |
research_docs = udm.create_collection([
|
158 |
"Research Report: Novel AI Techniques in Renewable Energy",
|
@@ -171,8 +171,8 @@ development_docs = udm.create_collection([
|
|
171 |
# ------------------------------
|
172 |
class ResearchRetriever:
|
173 |
"""
|
174 |
-
Provides retrieval methods for multiple domains (research, development
|
175 |
-
Uses MMR
|
176 |
"""
|
177 |
def __init__(self) -> None:
|
178 |
try:
|
@@ -200,7 +200,7 @@ class ResearchRetriever:
|
|
200 |
elif domain == "development":
|
201 |
return self.development_retriever.invoke(query)
|
202 |
else:
|
203 |
-
logger.warning(f"Domain '{domain}' not recognized. Defaulting to research.")
|
204 |
return self.research_retriever.invoke(query)
|
205 |
except Exception as e:
|
206 |
logger.error(f"Retrieval error for domain '{domain}': {e}")
|
@@ -213,8 +213,8 @@ retriever = ResearchRetriever()
|
|
213 |
# ------------------------------
|
214 |
class CognitiveProcessor:
|
215 |
"""
|
216 |
-
|
217 |
-
|
218 |
"""
|
219 |
def __init__(self) -> None:
|
220 |
self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
|
@@ -222,7 +222,7 @@ class CognitiveProcessor:
|
|
222 |
|
223 |
def process_query(self, prompt: str) -> Dict:
|
224 |
"""
|
225 |
-
|
226 |
"""
|
227 |
futures = []
|
228 |
for _ in range(3):
|
@@ -240,61 +240,63 @@ class CognitiveProcessor:
|
|
240 |
|
241 |
def _execute_api_request(self, prompt: str) -> Dict:
|
242 |
"""
|
243 |
-
Executes a single request to
|
244 |
"""
|
|
|
245 |
headers = {
|
246 |
-
"Authorization": f"Bearer {ResearchConfig.
|
247 |
-
"Content-Type": "application/json"
|
248 |
-
"X-Research-Session": self.session_id
|
249 |
}
|
250 |
payload = {
|
251 |
-
"model": "
|
252 |
-
"messages": [
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
256 |
"temperature": 0.7,
|
257 |
"max_tokens": 1500,
|
258 |
"top_p": 0.9
|
259 |
}
|
260 |
try:
|
261 |
response = requests.post(
|
262 |
-
"https://api.
|
263 |
headers=headers,
|
264 |
json=payload,
|
265 |
timeout=45
|
266 |
)
|
267 |
response.raise_for_status()
|
268 |
-
logger.info("
|
269 |
return response.json()
|
270 |
except requests.exceptions.RequestException as e:
|
271 |
-
logger.error(f"
|
272 |
return {"error": str(e)}
|
273 |
|
274 |
def _consensus_check(self, results: List[Dict]) -> Dict:
|
275 |
"""
|
276 |
-
Chooses the best response by comparing
|
277 |
"""
|
278 |
-
|
279 |
-
if not
|
280 |
logger.error("All API requests failed.")
|
281 |
return {"error": "All API requests failed"}
|
282 |
-
return max(
|
283 |
|
284 |
# ------------------------------
|
285 |
-
# Research Workflow Engine (LADDER
|
286 |
# ------------------------------
|
287 |
class ResearchWorkflow:
|
288 |
"""
|
289 |
-
Defines a multi-step workflow using LangGraph with Tufa Labs
|
290 |
1. Ingest Query
|
291 |
2. Retrieve Documents
|
292 |
3. Analyze Content
|
293 |
4. Validate Output
|
294 |
-
5. Refine (Recursive Self-Learning
|
295 |
|
296 |
-
The
|
297 |
-
|
298 |
"""
|
299 |
def __init__(self) -> None:
|
300 |
self.processor = CognitiveProcessor()
|
@@ -303,14 +305,14 @@ class ResearchWorkflow:
|
|
303 |
self.app = self.workflow.compile()
|
304 |
|
305 |
def _build_workflow(self) -> None:
|
306 |
-
#
|
307 |
self.workflow.add_node("ingest", self.ingest_query)
|
308 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
309 |
self.workflow.add_node("analyze", self.analyze_content)
|
310 |
self.workflow.add_node("validate", self.validate_output)
|
311 |
self.workflow.add_node("refine", self.refine_results)
|
312 |
|
313 |
-
#
|
314 |
self.workflow.set_entry_point("ingest")
|
315 |
self.workflow.add_edge("ingest", "retrieve")
|
316 |
self.workflow.add_edge("retrieve", "analyze")
|
@@ -324,7 +326,7 @@ class ResearchWorkflow:
|
|
324 |
|
325 |
def ingest_query(self, state: AgentState) -> Dict:
|
326 |
"""
|
327 |
-
|
328 |
"""
|
329 |
try:
|
330 |
query = state["messages"][-1].content
|
@@ -336,12 +338,11 @@ class ResearchWorkflow:
|
|
336 |
"metadata": {"timestamp": datetime.now().isoformat()}
|
337 |
}
|
338 |
except Exception as e:
|
339 |
-
return self._error_state(f"Ingestion Error: {
|
340 |
|
341 |
def retrieve_documents(self, state: AgentState) -> Dict:
|
342 |
"""
|
343 |
-
Retrieves relevant documents
|
344 |
-
The system can handle any domain (math, code generation, theorem proving, etc.).
|
345 |
"""
|
346 |
try:
|
347 |
query = state["context"]["raw_query"]
|
@@ -356,14 +357,12 @@ class ResearchWorkflow:
|
|
356 |
}
|
357 |
}
|
358 |
except Exception as e:
|
359 |
-
return self._error_state(f"Retrieval Error: {
|
360 |
|
361 |
def analyze_content(self, state: AgentState) -> Dict:
|
362 |
"""
|
363 |
-
|
364 |
-
|
365 |
-
- Provide structured analysis,
|
366 |
-
- Return a refined solution.
|
367 |
"""
|
368 |
try:
|
369 |
docs = state["context"].get("documents", [])
|
@@ -372,12 +371,10 @@ class ResearchWorkflow:
|
|
372 |
response = self.processor.process_query(prompt)
|
373 |
if "error" in response:
|
374 |
return self._error_state(response["error"])
|
375 |
-
logger.info("
|
376 |
return {
|
377 |
"messages": [
|
378 |
-
AIMessage(
|
379 |
-
content=response.get('choices', [{}])[0].get('message', {}).get('content', '')
|
380 |
-
)
|
381 |
],
|
382 |
"context": {
|
383 |
"analysis": response,
|
@@ -385,12 +382,12 @@ class ResearchWorkflow:
|
|
385 |
}
|
386 |
}
|
387 |
except Exception as e:
|
388 |
-
return self._error_state(f"Analysis Error: {
|
389 |
|
390 |
def validate_output(self, state: AgentState) -> Dict:
|
391 |
"""
|
392 |
-
Validates the analysis. If invalid, the system can refine
|
393 |
-
|
394 |
"""
|
395 |
analysis = state["messages"][-1].content
|
396 |
validation_prompt = (
|
@@ -399,7 +396,7 @@ class ResearchWorkflow:
|
|
399 |
"Respond with 'VALID' or 'INVALID'."
|
400 |
)
|
401 |
response = self.processor.process_query(validation_prompt)
|
402 |
-
logger.info("
|
403 |
return {
|
404 |
"messages": [
|
405 |
AIMessage(
|
@@ -410,22 +407,18 @@ class ResearchWorkflow:
|
|
410 |
|
411 |
def refine_results(self, state: AgentState) -> Dict:
|
412 |
"""
|
413 |
-
|
414 |
-
|
415 |
-
- Re-solve with no external data,
|
416 |
-
- Potentially leverage TTRL for dynamic updates.
|
417 |
-
|
418 |
-
This method increments a refinement counter to avoid infinite recursion.
|
419 |
"""
|
420 |
current_count = state["context"].get("refine_count", 0)
|
421 |
state["context"]["refine_count"] = current_count + 1
|
422 |
logger.info(f"LADDER refinement iteration: {state['context']['refine_count']}")
|
423 |
|
424 |
refinement_prompt = (
|
425 |
-
|
426 |
f"{state['messages'][-1].content}\n\n"
|
427 |
-
"
|
428 |
-
"
|
429 |
)
|
430 |
response = self.processor.process_query(refinement_prompt)
|
431 |
logger.info("Refinement completed.")
|
@@ -440,19 +433,20 @@ class ResearchWorkflow:
|
|
440 |
|
441 |
def _quality_check(self, state: AgentState) -> str:
|
442 |
"""
|
443 |
-
|
444 |
-
|
445 |
"""
|
446 |
refine_count = state["context"].get("refine_count", 0)
|
447 |
if refine_count >= 3:
|
448 |
-
logger.warning("Refinement limit reached. Forcing valid outcome
|
449 |
return "valid"
|
|
|
450 |
content = state["messages"][-1].content
|
451 |
return "valid" if "VALID" in content else "invalid"
|
452 |
|
453 |
def _error_state(self, message: str) -> Dict:
|
454 |
"""
|
455 |
-
Returns an error state if any node fails.
|
456 |
"""
|
457 |
logger.error(message)
|
458 |
return {
|
@@ -462,12 +456,12 @@ class ResearchWorkflow:
|
|
462 |
}
|
463 |
|
464 |
# ------------------------------
|
465 |
-
#
|
466 |
# ------------------------------
|
467 |
class ResearchInterface:
|
468 |
"""
|
469 |
-
Provides a Streamlit-based interface for the UniversalResearch AI
|
470 |
-
|
471 |
"""
|
472 |
def __init__(self) -> None:
|
473 |
self.workflow = ResearchWorkflow()
|
@@ -475,7 +469,7 @@ class ResearchInterface:
|
|
475 |
|
476 |
def _initialize_interface(self) -> None:
|
477 |
st.set_page_config(
|
478 |
-
page_title="UniversalResearch AI (LADDER)",
|
479 |
layout="wide",
|
480 |
initial_sidebar_state="expanded"
|
481 |
)
|
@@ -541,12 +535,12 @@ class ResearchInterface:
|
|
541 |
def _build_main_interface(self) -> None:
|
542 |
st.title("🧠 UniversalResearch AI")
|
543 |
st.write(
|
544 |
-
"
|
545 |
)
|
546 |
query = st.text_area(
|
547 |
"Research Query:",
|
548 |
height=200,
|
549 |
-
placeholder="Enter a research question
|
550 |
)
|
551 |
if st.button("Execute Analysis", type="primary"):
|
552 |
self._execute_analysis(query)
|
@@ -554,7 +548,7 @@ class ResearchInterface:
|
|
554 |
def _execute_analysis(self, query: str) -> None:
|
555 |
try:
|
556 |
with st.spinner("Initializing LADDER-based Analysis..."):
|
557 |
-
# The recursion_limit
|
558 |
results = self.workflow.app.stream({
|
559 |
"messages": [HumanMessage(content=query)],
|
560 |
"context": {},
|
@@ -571,12 +565,13 @@ class ResearchInterface:
|
|
571 |
Potential issues:
|
572 |
- Complex query structure
|
573 |
- Document correlation failure
|
|
|
574 |
- Temporal processing constraints"""
|
575 |
)
|
576 |
|
577 |
def _render_event(self, event: Dict) -> None:
|
578 |
"""
|
579 |
-
Renders each event in the Streamlit UI.
|
580 |
"""
|
581 |
if 'ingest' in event:
|
582 |
with st.container():
|
@@ -600,7 +595,7 @@ Potential issues:
|
|
600 |
if "VALID" in content:
|
601 |
st.success("✅ Validation Passed")
|
602 |
with st.expander("View Validated Analysis", expanded=True):
|
603 |
-
#
|
604 |
st.markdown(content.split("Validation:")[0])
|
605 |
else:
|
606 |
st.warning("⚠️ Validation Issues Detected")
|
|
|
1 |
# ------------------------------
|
2 |
+
# UniversalResearch AI with LADDER (OpenAI Integration)
|
3 |
# ------------------------------
|
4 |
import logging
|
5 |
import os
|
|
|
16 |
import requests
|
17 |
import streamlit as st
|
18 |
|
19 |
+
# LangChain & LangGraph imports
|
20 |
from langchain_openai import OpenAIEmbeddings
|
21 |
from langchain_community.vectorstores import Chroma
|
22 |
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
|
|
27 |
from typing_extensions import TypedDict, Annotated
|
28 |
from langchain.tools.retriever import create_retriever_tool
|
29 |
|
30 |
+
# Increase Python's recursion limit at the start (if needed)
|
31 |
sys.setrecursionlimit(10000)
|
32 |
|
33 |
# ------------------------------
|
|
|
45 |
class AgentState(TypedDict):
|
46 |
"""
|
47 |
Stores the messages and context for each step in the workflow.
|
48 |
+
'messages': conversation so far
|
49 |
+
'context': domain-specific data (docs, counters)
|
50 |
+
'metadata': any additional info (timestamps, status)
|
51 |
"""
|
52 |
messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
|
53 |
context: Dict[str, Any]
|
|
|
58 |
# ------------------------------
|
59 |
class ResearchConfig:
|
60 |
"""
|
61 |
+
Universal config for the advanced AI system with Tufa Labs' LADDER approach,
|
62 |
+
using OpenAI for both embeddings and completions.
|
63 |
|
64 |
+
Make sure to set OPENAI_API_KEY in your environment or HF Space secrets.
|
|
|
65 |
"""
|
66 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") # Must match your HF secret name
|
67 |
CHROMA_PATH = "chroma_db"
|
68 |
CHUNK_SIZE = 512
|
69 |
CHUNK_OVERLAP = 64
|
70 |
MAX_CONCURRENT_REQUESTS = 5
|
71 |
EMBEDDING_DIMENSIONS = 1536
|
72 |
|
73 |
+
# Example map for featured documents
|
74 |
DOCUMENT_MAP = {
|
75 |
"Sample Research Document 1": "Topic A Overview",
|
76 |
"Sample Research Document 2": "Topic B Analysis",
|
77 |
"Sample Research Document 3": "Topic C Innovations"
|
78 |
}
|
79 |
|
80 |
+
# Analysis template referencing LADDER's approach
|
81 |
ANALYSIS_TEMPLATE = (
|
82 |
"Analyze the following research documents with scientific rigor:\n{context}\n\n"
|
83 |
+
"Use Tufa Labs’ LADDER method to:\n"
|
84 |
+
"1. Break down complex problems into subproblems.\n"
|
85 |
"2. Iteratively refine the solution.\n"
|
86 |
+
"3. Provide analysis including:\n"
|
87 |
" a. Key Contributions\n"
|
88 |
" b. Novel Methodologies\n"
|
89 |
" c. Empirical Results (with metrics)\n"
|
90 |
" d. Potential Applications\n"
|
91 |
" e. Limitations & Future Directions\n\n"
|
92 |
+
"Format your response in Markdown with LaTeX where applicable."
|
93 |
)
|
94 |
|
95 |
# Early check for missing API key
|
96 |
+
if not ResearchConfig.OPENAI_API_KEY:
|
97 |
st.error(
|
98 |
+
"""**OpenAI API Key Not Found**
|
99 |
+
Please set `OPENAI_API_KEY` in your Space secrets and rebuild the Space."""
|
|
|
|
|
100 |
)
|
101 |
st.stop()
|
102 |
|
|
|
105 |
# ------------------------------
|
106 |
class UniversalDocumentManager:
|
107 |
"""
|
108 |
+
Manages creation of document collections for any research domain,
|
109 |
+
using OpenAI embeddings for semantic search.
|
110 |
"""
|
111 |
def __init__(self) -> None:
|
112 |
try:
|
|
|
116 |
logger.error(f"Error initializing PersistentClient: {e}")
|
117 |
self.client = chromadb.Client() # Fallback to in-memory client
|
118 |
|
119 |
+
# Configure embeddings from openai
|
120 |
self.embeddings = OpenAIEmbeddings(
|
121 |
model="text-embedding-3-large",
|
122 |
dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
|
|
|
124 |
|
125 |
def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
|
126 |
"""
|
127 |
+
Splits documents into chunks and stores them in a Chroma collection.
|
128 |
"""
|
129 |
splitter = RecursiveCharacterTextSplitter(
|
130 |
chunk_size=ResearchConfig.CHUNK_SIZE,
|
|
|
133 |
)
|
134 |
try:
|
135 |
docs = splitter.create_documents(documents)
|
136 |
+
logger.info(f"Created {len(docs)} doc chunks for collection '{collection_name}'.")
|
137 |
except Exception as e:
|
138 |
logger.error(f"Error splitting documents: {e}")
|
139 |
raise e
|
|
|
148 |
|
149 |
def _document_id(self, content: str) -> str:
|
150 |
"""
|
151 |
+
Generates a unique ID using SHA256 + timestamp.
|
152 |
"""
|
153 |
return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
|
154 |
|
155 |
+
# Example collections (replace with your own)
|
156 |
udm = UniversalDocumentManager()
|
157 |
research_docs = udm.create_collection([
|
158 |
"Research Report: Novel AI Techniques in Renewable Energy",
|
|
|
171 |
# ------------------------------
|
172 |
class ResearchRetriever:
|
173 |
"""
|
174 |
+
Provides retrieval methods for multiple domains (e.g., research, development).
|
175 |
+
Uses MMR or similarity-based retrieval from Chroma.
|
176 |
"""
|
177 |
def __init__(self) -> None:
|
178 |
try:
|
|
|
200 |
elif domain == "development":
|
201 |
return self.development_retriever.invoke(query)
|
202 |
else:
|
203 |
+
logger.warning(f"Domain '{domain}' not recognized. Defaulting to 'research'.")
|
204 |
return self.research_retriever.invoke(query)
|
205 |
except Exception as e:
|
206 |
logger.error(f"Retrieval error for domain '{domain}': {e}")
|
|
|
213 |
# ------------------------------
|
214 |
class CognitiveProcessor:
|
215 |
"""
|
216 |
+
Executes requests to the OpenAI Chat Completions endpoint in parallel,
|
217 |
+
then consolidates the results using a consensus mechanism (picks the longest).
|
218 |
"""
|
219 |
def __init__(self) -> None:
|
220 |
self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
|
|
|
222 |
|
223 |
def process_query(self, prompt: str) -> Dict:
|
224 |
"""
|
225 |
+
Sends multiple parallel requests (triple redundancy) to OpenAI's ChatCompletion.
|
226 |
"""
|
227 |
futures = []
|
228 |
for _ in range(3):
|
|
|
240 |
|
241 |
def _execute_api_request(self, prompt: str) -> Dict:
|
242 |
"""
|
243 |
+
Executes a single request to OpenAI's ChatCompletion endpoint.
|
244 |
"""
|
245 |
+
# Use your OPENAI_API_KEY
|
246 |
headers = {
|
247 |
+
"Authorization": f"Bearer {ResearchConfig.OPENAI_API_KEY}",
|
248 |
+
"Content-Type": "application/json"
|
|
|
249 |
}
|
250 |
payload = {
|
251 |
+
"model": "gpt-3.5-turbo", # or "gpt-4", depending on your account
|
252 |
+
"messages": [
|
253 |
+
{
|
254 |
+
"role": "user",
|
255 |
+
"content": prompt
|
256 |
+
}
|
257 |
+
],
|
258 |
"temperature": 0.7,
|
259 |
"max_tokens": 1500,
|
260 |
"top_p": 0.9
|
261 |
}
|
262 |
try:
|
263 |
response = requests.post(
|
264 |
+
"https://api.openai.com/v1/chat/completions",
|
265 |
headers=headers,
|
266 |
json=payload,
|
267 |
timeout=45
|
268 |
)
|
269 |
response.raise_for_status()
|
270 |
+
logger.info("OpenAI ChatCompletion request successful.")
|
271 |
return response.json()
|
272 |
except requests.exceptions.RequestException as e:
|
273 |
+
logger.error(f"OpenAI request failed: {e}")
|
274 |
return {"error": str(e)}
|
275 |
|
276 |
def _consensus_check(self, results: List[Dict]) -> Dict:
|
277 |
"""
|
278 |
+
Chooses the 'best' response by comparing content lengths, discarding errors.
|
279 |
"""
|
280 |
+
valid = [r for r in results if "error" not in r]
|
281 |
+
if not valid:
|
282 |
logger.error("All API requests failed.")
|
283 |
return {"error": "All API requests failed"}
|
284 |
+
return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
285 |
|
286 |
# ------------------------------
|
287 |
+
# Research Workflow Engine (Tufa Labs' LADDER)
|
288 |
# ------------------------------
|
289 |
class ResearchWorkflow:
|
290 |
"""
|
291 |
+
Defines a multi-step workflow using LangGraph with Tufa Labs’ LADDER approach:
|
292 |
1. Ingest Query
|
293 |
2. Retrieve Documents
|
294 |
3. Analyze Content
|
295 |
4. Validate Output
|
296 |
+
5. Refine (Recursive Self-Learning)
|
297 |
|
298 |
+
The refine step uses iterative subproblem breakdown,
|
299 |
+
potentially combined with test-time reinforcement.
|
300 |
"""
|
301 |
def __init__(self) -> None:
|
302 |
self.processor = CognitiveProcessor()
|
|
|
305 |
self.app = self.workflow.compile()
|
306 |
|
307 |
def _build_workflow(self) -> None:
|
308 |
+
# Node definitions
|
309 |
self.workflow.add_node("ingest", self.ingest_query)
|
310 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
311 |
self.workflow.add_node("analyze", self.analyze_content)
|
312 |
self.workflow.add_node("validate", self.validate_output)
|
313 |
self.workflow.add_node("refine", self.refine_results)
|
314 |
|
315 |
+
# Graph edges
|
316 |
self.workflow.set_entry_point("ingest")
|
317 |
self.workflow.add_edge("ingest", "retrieve")
|
318 |
self.workflow.add_edge("retrieve", "analyze")
|
|
|
326 |
|
327 |
def ingest_query(self, state: AgentState) -> Dict:
|
328 |
"""
|
329 |
+
Ingest the user query and initialize the refine counter for LADDER recursion.
|
330 |
"""
|
331 |
try:
|
332 |
query = state["messages"][-1].content
|
|
|
338 |
"metadata": {"timestamp": datetime.now().isoformat()}
|
339 |
}
|
340 |
except Exception as e:
|
341 |
+
return self._error_state(f"Ingestion Error: {e}")
|
342 |
|
343 |
def retrieve_documents(self, state: AgentState) -> Dict:
|
344 |
"""
|
345 |
+
Retrieves relevant documents from the specified domain (default: research).
|
|
|
346 |
"""
|
347 |
try:
|
348 |
query = state["context"]["raw_query"]
|
|
|
357 |
}
|
358 |
}
|
359 |
except Exception as e:
|
360 |
+
return self._error_state(f"Retrieval Error: {e}")
|
361 |
|
362 |
def analyze_content(self, state: AgentState) -> Dict:
|
363 |
"""
|
364 |
+
Uses the LADDER approach to break down and analyze documents,
|
365 |
+
returning a structured research analysis.
|
|
|
|
|
366 |
"""
|
367 |
try:
|
368 |
docs = state["context"].get("documents", [])
|
|
|
371 |
response = self.processor.process_query(prompt)
|
372 |
if "error" in response:
|
373 |
return self._error_state(response["error"])
|
374 |
+
logger.info("Analysis completed.")
|
375 |
return {
|
376 |
"messages": [
|
377 |
+
AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))
|
|
|
|
|
378 |
],
|
379 |
"context": {
|
380 |
"analysis": response,
|
|
|
382 |
}
|
383 |
}
|
384 |
except Exception as e:
|
385 |
+
return self._error_state(f"Analysis Error: {e}")
|
386 |
|
387 |
def validate_output(self, state: AgentState) -> Dict:
|
388 |
"""
|
389 |
+
Validates the analysis. If invalid, the system can refine
|
390 |
+
using Tufa Labs’ LADDER approach.
|
391 |
"""
|
392 |
analysis = state["messages"][-1].content
|
393 |
validation_prompt = (
|
|
|
396 |
"Respond with 'VALID' or 'INVALID'."
|
397 |
)
|
398 |
response = self.processor.process_query(validation_prompt)
|
399 |
+
logger.info("Validation completed.")
|
400 |
return {
|
401 |
"messages": [
|
402 |
AIMessage(
|
|
|
407 |
|
408 |
def refine_results(self, state: AgentState) -> Dict:
|
409 |
"""
|
410 |
+
LADDER refinement: break down subproblems, re-solve them
|
411 |
+
with no external data, potentially using TTRL for dynamic updates.
|
|
|
|
|
|
|
|
|
412 |
"""
|
413 |
current_count = state["context"].get("refine_count", 0)
|
414 |
state["context"]["refine_count"] = current_count + 1
|
415 |
logger.info(f"LADDER refinement iteration: {state['context']['refine_count']}")
|
416 |
|
417 |
refinement_prompt = (
|
418 |
+
"Refine this analysis with LADDER’s self-improvement approach:\n"
|
419 |
f"{state['messages'][-1].content}\n\n"
|
420 |
+
"Break down complex points further, re-solve them, and enhance:\n"
|
421 |
+
"- Technical precision\n- Empirical grounding\n- Theoretical coherence"
|
422 |
)
|
423 |
response = self.processor.process_query(refinement_prompt)
|
424 |
logger.info("Refinement completed.")
|
|
|
433 |
|
434 |
def _quality_check(self, state: AgentState) -> str:
|
435 |
"""
|
436 |
+
Checks if the analysis is valid. If the refine_count >= 3,
|
437 |
+
forcibly accept to avoid infinite loops.
|
438 |
"""
|
439 |
refine_count = state["context"].get("refine_count", 0)
|
440 |
if refine_count >= 3:
|
441 |
+
logger.warning("Refinement limit reached. Forcing valid outcome.")
|
442 |
return "valid"
|
443 |
+
|
444 |
content = state["messages"][-1].content
|
445 |
return "valid" if "VALID" in content else "invalid"
|
446 |
|
447 |
def _error_state(self, message: str) -> Dict:
|
448 |
"""
|
449 |
+
Returns an error state if any node fails.
|
450 |
"""
|
451 |
logger.error(message)
|
452 |
return {
|
|
|
456 |
}
|
457 |
|
458 |
# ------------------------------
|
459 |
+
# Streamlit UI
|
460 |
# ------------------------------
|
461 |
class ResearchInterface:
|
462 |
"""
|
463 |
+
Provides a Streamlit-based interface for the UniversalResearch AI
|
464 |
+
with Tufa Labs' LADDER approach, using OpenAI for both embeddings & completions.
|
465 |
"""
|
466 |
def __init__(self) -> None:
|
467 |
self.workflow = ResearchWorkflow()
|
|
|
469 |
|
470 |
def _initialize_interface(self) -> None:
|
471 |
st.set_page_config(
|
472 |
+
page_title="UniversalResearch AI (OpenAI + LADDER)",
|
473 |
layout="wide",
|
474 |
initial_sidebar_state="expanded"
|
475 |
)
|
|
|
535 |
def _build_main_interface(self) -> None:
|
536 |
st.title("🧠 UniversalResearch AI")
|
537 |
st.write(
|
538 |
+
"This system uses OpenAI for embeddings & completions"
|
539 |
)
|
540 |
query = st.text_area(
|
541 |
"Research Query:",
|
542 |
height=200,
|
543 |
+
placeholder="Enter a research question (e.g., advanced math, code tasks, etc.)..."
|
544 |
)
|
545 |
if st.button("Execute Analysis", type="primary"):
|
546 |
self._execute_analysis(query)
|
|
|
548 |
def _execute_analysis(self, query: str) -> None:
|
549 |
try:
|
550 |
with st.spinner("Initializing LADDER-based Analysis..."):
|
551 |
+
# The recursion_limit ensures multiple refine iterations are possible
|
552 |
results = self.workflow.app.stream({
|
553 |
"messages": [HumanMessage(content=query)],
|
554 |
"context": {},
|
|
|
565 |
Potential issues:
|
566 |
- Complex query structure
|
567 |
- Document correlation failure
|
568 |
+
- Rate limits or invalid API key
|
569 |
- Temporal processing constraints"""
|
570 |
)
|
571 |
|
572 |
def _render_event(self, event: Dict) -> None:
|
573 |
"""
|
574 |
+
Renders each event in the Streamlit UI, from ingestion to validation/refinement.
|
575 |
"""
|
576 |
if 'ingest' in event:
|
577 |
with st.container():
|
|
|
595 |
if "VALID" in content:
|
596 |
st.success("✅ Validation Passed")
|
597 |
with st.expander("View Validated Analysis", expanded=True):
|
598 |
+
# Hide "Validation: ..." from final output
|
599 |
st.markdown(content.split("Validation:")[0])
|
600 |
else:
|
601 |
st.warning("⚠️ Validation Issues Detected")
|