Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -61,7 +61,7 @@ Respond with:
|
|
61 |
Format: Markdown with LaTeX mathematical notation where applicable
|
62 |
"""
|
63 |
|
64 |
-
#
|
65 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
66 |
st.error("""**Research Portal Configuration Required**
|
67 |
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
@@ -87,6 +87,8 @@ class QuantumDocumentManager:
|
|
87 |
separators=["\n\n", "\n", "|||"]
|
88 |
)
|
89 |
docs = splitter.create_documents(documents)
|
|
|
|
|
90 |
return Chroma.from_documents(
|
91 |
documents=docs,
|
92 |
embedding=self.embeddings,
|
@@ -134,8 +136,11 @@ class ResearchRetriever:
|
|
134 |
|
135 |
def retrieve(self, query: str, domain: str) -> List[Any]:
|
136 |
try:
|
137 |
-
|
|
|
|
|
138 |
except KeyError:
|
|
|
139 |
return []
|
140 |
|
141 |
retriever = ResearchRetriever()
|
@@ -150,7 +155,7 @@ class CognitiveProcessor:
|
|
150 |
|
151 |
def process_query(self, prompt: str) -> Dict:
|
152 |
futures = []
|
153 |
-
for _ in range(3): # Triple redundancy
|
154 |
futures.append(self.executor.submit(
|
155 |
self._execute_api_request,
|
156 |
prompt
|
@@ -197,6 +202,7 @@ class CognitiveProcessor:
|
|
197 |
valid = [r for r in results if "error" not in r]
|
198 |
if not valid:
|
199 |
return {"error": "All API requests failed"}
|
|
|
200 |
return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
201 |
|
202 |
# ------------------------------
|
@@ -209,6 +215,7 @@ class ResearchWorkflow:
|
|
209 |
self._build_workflow()
|
210 |
|
211 |
def _build_workflow(self):
|
|
|
212 |
self.workflow.add_node("ingest", self.ingest_query)
|
213 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
214 |
self.workflow.add_node("analyze", self.analyze_content)
|
@@ -231,6 +238,7 @@ class ResearchWorkflow:
|
|
231 |
def ingest_query(self, state: AgentState) -> Dict:
|
232 |
try:
|
233 |
query = state["messages"][-1].content
|
|
|
234 |
return {
|
235 |
"messages": [AIMessage(content="Query ingested successfully")],
|
236 |
"context": {"raw_query": query},
|
@@ -243,6 +251,8 @@ class ResearchWorkflow:
|
|
243 |
try:
|
244 |
query = state["context"]["raw_query"]
|
245 |
docs = retriever.retrieve(query, "research")
|
|
|
|
|
246 |
return {
|
247 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
248 |
"context": {
|
@@ -255,7 +265,13 @@ class ResearchWorkflow:
|
|
255 |
|
256 |
def analyze_content(self, state: AgentState) -> Dict:
|
257 |
try:
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
|
260 |
response = self.processor.process_query(prompt)
|
261 |
|
@@ -304,9 +320,11 @@ Improve:
|
|
304 |
|
305 |
def _quality_check(self, state: AgentState) -> str:
|
306 |
content = state["messages"][-1].content
|
|
|
307 |
return "valid" if "VALID" in content else "invalid"
|
308 |
|
309 |
def _error_state(self, message: str) -> Dict:
|
|
|
310 |
return {
|
311 |
"messages": [AIMessage(content=f"❌ {message}")],
|
312 |
"context": {"error": True},
|
|
|
61 |
Format: Markdown with LaTeX mathematical notation where applicable
|
62 |
"""
|
63 |
|
64 |
+
# Validate API key configuration
|
65 |
if not ResearchConfig.DEEPSEEK_API_KEY:
|
66 |
st.error("""**Research Portal Configuration Required**
|
67 |
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
|
|
|
87 |
separators=["\n\n", "\n", "|||"]
|
88 |
)
|
89 |
docs = splitter.create_documents(documents)
|
90 |
+
# Log how many chunks were created
|
91 |
+
st.write(f"Created {len(docs)} chunks for collection '{collection_name}'")
|
92 |
return Chroma.from_documents(
|
93 |
documents=docs,
|
94 |
embedding=self.embeddings,
|
|
|
136 |
|
137 |
def retrieve(self, query: str, domain: str) -> List[Any]:
|
138 |
try:
|
139 |
+
results = self.retrievers[domain].invoke(query)
|
140 |
+
st.write(f"[DEBUG] Retrieved {len(results)} documents for query: '{query}' in domain '{domain}'")
|
141 |
+
return results
|
142 |
except KeyError:
|
143 |
+
st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
|
144 |
return []
|
145 |
|
146 |
retriever = ResearchRetriever()
|
|
|
155 |
|
156 |
def process_query(self, prompt: str) -> Dict:
|
157 |
futures = []
|
158 |
+
for _ in range(3): # Triple redundancy for robustness
|
159 |
futures.append(self.executor.submit(
|
160 |
self._execute_api_request,
|
161 |
prompt
|
|
|
202 |
valid = [r for r in results if "error" not in r]
|
203 |
if not valid:
|
204 |
return {"error": "All API requests failed"}
|
205 |
+
# Choose the result with the longest content
|
206 |
return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
|
207 |
|
208 |
# ------------------------------
|
|
|
215 |
self._build_workflow()
|
216 |
|
217 |
def _build_workflow(self):
|
218 |
+
# Register nodes in the state graph
|
219 |
self.workflow.add_node("ingest", self.ingest_query)
|
220 |
self.workflow.add_node("retrieve", self.retrieve_documents)
|
221 |
self.workflow.add_node("analyze", self.analyze_content)
|
|
|
238 |
def ingest_query(self, state: AgentState) -> Dict:
|
239 |
try:
|
240 |
query = state["messages"][-1].content
|
241 |
+
st.write(f"[DEBUG] Ingesting query: {query}")
|
242 |
return {
|
243 |
"messages": [AIMessage(content="Query ingested successfully")],
|
244 |
"context": {"raw_query": query},
|
|
|
251 |
try:
|
252 |
query = state["context"]["raw_query"]
|
253 |
docs = retriever.retrieve(query, "research")
|
254 |
+
# Log the retrieval result for debugging
|
255 |
+
st.write(f"[DEBUG] Retrieved {len(docs)} documents from retrieval node.")
|
256 |
return {
|
257 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
258 |
"context": {
|
|
|
265 |
|
266 |
def analyze_content(self, state: AgentState) -> Dict:
|
267 |
try:
|
268 |
+
# Ensure documents are present before proceeding
|
269 |
+
if "documents" not in state["context"] or not state["context"]["documents"]:
|
270 |
+
return self._error_state("No documents retrieved; please check your query or retrieval process.")
|
271 |
+
|
272 |
+
# Concatenate all document content for analysis
|
273 |
+
docs = "\n\n".join([d.page_content for d in state["context"]["documents"] if hasattr(d, "page_content")])
|
274 |
+
st.write(f"[DEBUG] Analyzing content from {len(state['context']['documents'])} documents.")
|
275 |
prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
|
276 |
response = self.processor.process_query(prompt)
|
277 |
|
|
|
320 |
|
321 |
def _quality_check(self, state: AgentState) -> str:
|
322 |
content = state["messages"][-1].content
|
323 |
+
# Check for the keyword "VALID" in the output; if missing, trigger refinement
|
324 |
return "valid" if "VALID" in content else "invalid"
|
325 |
|
326 |
def _error_state(self, message: str) -> Dict:
|
327 |
+
st.write(f"[ERROR] {message}")
|
328 |
return {
|
329 |
"messages": [AIMessage(content=f"❌ {message}")],
|
330 |
"context": {"error": True},
|