mgbam commited on
Commit
bd23f77
·
verified ·
1 Parent(s): a2dbafb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -269
app.py CHANGED
@@ -1,9 +1,9 @@
1
  """
2
- AI Research Assistant Supreme - Enterprise-Grade Solution
3
  """
4
 
5
  # ------------------------------
6
- # Imports & Infrastructure
7
  # ------------------------------
8
  import os
9
  import re
@@ -11,319 +11,288 @@ import time
11
  import chromadb
12
  import requests
13
  import streamlit as st
14
- from typing import Sequence, Optional, Dict, Any
15
- from datetime import datetime
16
- from concurrent.futures import ThreadPoolExecutor
17
- from functools import lru_cache
18
- from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
19
  from langchain.text_splitter import RecursiveCharacterTextSplitter
20
  from langchain_community.vectorstores import Chroma
21
  from langchain.tools.retriever import create_retriever_tool
22
  from langgraph.graph import END, StateGraph
23
  from langgraph.prebuilt import ToolNode
24
- from typing_extensions import TypedDict, Annotated
25
  from chromadb.config import Settings
26
- import logging
27
- import hashlib
28
- from queue import Queue
29
 
30
  # ------------------------------
31
- # Enterprise Configuration
32
  # ------------------------------
33
- class Config:
34
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
35
- MAX_CONCURRENT_REQUESTS = 3
36
- REQUEST_RATE_LIMIT = 5 # Requests per minute
37
- CACHE_SIZE = 1000
38
- SECURITY_SALT = os.environ.get("SECURITY_SALT", "default-secure-salt")
39
 
40
  # ------------------------------
41
- # Advanced Security Framework
42
  # ------------------------------
43
- class SecurityManager:
44
- @staticmethod
45
- def validate_api_key(key: str) -> bool:
46
- if not key.startswith("sk-"):
47
- return False
48
- return len(key) in {32, 40, 64} # Common API key lengths
49
-
50
- @staticmethod
51
- def generate_request_signature(payload: dict) -> str:
52
- timestamp = str(int(time.time()))
53
- data = timestamp + Config.SECURITY_SALT + str(payload)
54
- return hashlib.sha256(data.encode()).hexdigest()
55
 
56
  # ------------------------------
57
- # Quantum-Level Text Processing
58
  # ------------------------------
59
- class AdvancedTextProcessor:
60
  def __init__(self):
61
- self.splitter = RecursiveCharacterTextSplitter(
62
- chunk_size=512,
63
- chunk_overlap=128,
64
- separators=["\n\n", "\n", ". ", "! ", "? ", " ", ""],
65
- length_function=len,
66
- is_separator_regex=False
67
- )
 
68
 
69
- @lru_cache(maxsize=Config.CACHE_SIZE)
70
- def process_documents(self, texts: tuple, collection_name: str) -> Chroma:
71
- docs = self.splitter.create_documents(list(texts))
72
- return Chroma.from_documents(
73
- documents=docs,
74
- embedding=OpenAIEmbeddings(model="text-embedding-3-large"),
75
- client=chroma_client,
76
- collection_name=collection_name,
77
- collection_metadata={"hnsw:space": "cosine", "optimized": "true"}
78
- )
 
79
 
80
- # ------------------------------
81
- # Neural Workflow Orchestration
82
- # ------------------------------
83
- class EnterpriseWorkflowEngine:
84
- def __init__(self):
85
- self.text_processor = AdvancedTextProcessor()
86
- self._init_vector_stores()
87
- self._init_tools()
88
- self._build_graph()
89
 
90
- def _init_vector_stores(self):
91
- self.research_vs = self.text_processor.process_documents(
92
- tuple(research_texts), "research_collection"
 
 
93
  )
94
- self.development_vs = self.text_processor.process_documents(
95
- tuple(development_texts), "development_collection"
 
 
 
 
 
 
96
  )
97
 
98
- def _init_tools(self):
99
- self.tools = [
100
  create_retriever_tool(
101
- self.research_vs.as_retriever(search_kwargs={"k": 5}),
102
  "research_db",
103
- "Semantic search across research documents"
104
  ),
105
  create_retriever_tool(
106
- self.development_vs.as_retriever(search_kwargs={"k": 5}),
107
  "development_db",
108
- "Search through project development updates"
109
  )
110
  ]
111
-
112
- def _build_graph(self):
113
- self.workflow = StateGraph(AgentState)
114
- self.workflow.add_node("agent", self.quantum_agent)
115
- self.workflow.add_node("retrieve", ToolNode(self.tools))
116
- self.workflow.add_node("generate", self.generate_answer)
117
- self.workflow.add_node("rewrite", self.rewrite_query)
118
 
119
- self.workflow.set_entry_point("agent")
120
- self.workflow.add_conditional_edges(
121
- "agent", self._route_action,
122
- {"retrieve": "retrieve", "direct": "generate"}
123
- )
124
- self.workflow.add_conditional_edges(
125
- "retrieve", self._evaluate_results,
126
- {"generate": "generate", "rewrite": "rewrite"}
127
- )
128
- self.workflow.add_edge("generate", END)
129
- self.workflow.add_edge("rewrite", "agent")
130
 
131
- self.app = self.workflow.compile()
132
-
133
- def _route_action(self, state: AgentState) -> str:
134
- # Advanced routing logic using ML-based classification
135
- last_msg = state["messages"][-1].content.lower()
136
- research_keywords = {"research", "study", "paper", "algorithm"}
137
- dev_keywords = {"project", "status", "development", "update"}
138
 
139
- if any(kw in last_msg for kw in research_keywords):
140
- return "retrieve"
141
- elif any(kw in last_msg for kw in dev_keywords):
142
- return "retrieve"
143
- return "direct"
144
-
145
- def _evaluate_results(self, state: AgentState) -> str:
146
- # Advanced result evaluation with confidence scoring
147
- results = state["messages"][-1].content
148
- doc_count = results.count("Document(")
149
- confidence = min(doc_count / 5, 1.0) # Scale based on retrieved docs
150
 
151
- if confidence >= 0.7:
152
- return "generate"
153
- return "rewrite"
154
-
155
- # Core Components with Enterprise Features
156
- def quantum_agent(self, state: AgentState):
157
- # Implementation with advanced security and rate limiting
158
- pass
159
-
160
- def generate_answer(self, state: AgentState):
161
- # Multi-stage generation with fact-checking
162
- pass
163
-
164
- def rewrite_query(self, state: AgentState):
165
- # Context-aware query refinement
166
- pass
167
-
168
- # ------------------------------
169
- # Military-Grade Security Setup
170
- # ------------------------------
171
- if not SecurityManager.validate_api_key(Config.DEEPSEEK_API_KEY):
172
- st.error("""
173
- 🔐 Critical Security Alert:
174
- Invalid API key configuration detected!
175
- Please verify your DEEPSEEK_API_KEY environment variable.
176
- """)
177
- st.stop()
178
 
179
- # ------------------------------
180
- # Zero-Trust Vector Database
181
- # ------------------------------
182
- os.makedirs("chroma_db", exist_ok=True)
183
- chroma_client = chromadb.PersistentClient(
184
- path="chroma_db",
185
- settings=Settings(allow_reset=False, anonymized_telemetry=False)
186
- )
187
 
188
- # ------------------------------
189
- # Cybernetic UI Framework
190
- # ------------------------------
191
- class HolographicInterface:
192
- def __init__(self):
193
- self._init_style()
194
- self._init_session_state()
195
-
196
- def _init_style(self):
197
- st.set_page_config(
198
- page_title="NeuroSphere AI Analyst",
199
- layout="wide",
200
- initial_sidebar_state="expanded",
201
- menu_items={
202
- 'Get Help': 'https://neurosphere.ai',
203
- 'Report a bug': "https://neurosphere.ai/support",
204
- 'About': "# NeuroSphere v2.0 - Cognitive Analysis Suite"
205
- }
206
- )
207
-
208
- st.markdown(f"""
209
- <style>
210
- :root {{
211
- --primary: #2ecc71;
212
- --secondary: #3498db;
213
- --background: #0f0f12;
214
- --text: #ecf0f1;
215
- }}
216
 
217
- .stApp {{
218
- background: var(--background);
219
- color: var(--text);
220
- font-family: 'Roboto Mono', monospace;
221
- }}
222
 
223
- .stTextInput input, .stTextArea textarea {{
224
- background: #1a1a1f !important;
225
- color: var(--text) !important;
226
- border: 1px solid #2c3e50;
227
- border-radius: 8px;
228
- padding: 15px !important;
229
- }}
 
 
 
 
 
 
 
 
 
230
 
231
- .stButton>button {{
232
- background: linear-gradient(135deg, var(--primary), var(--secondary));
233
- border: none;
234
- border-radius: 8px;
235
- padding: 12px 24px;
236
- font-weight: 700;
237
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
238
- }}
239
 
240
- .stButton>button:hover {{
241
- transform: translateY(-2px);
242
- box-shadow: 0 4px 15px rgba(46, 204, 113, 0.3);
243
- }}
 
 
 
 
 
 
 
244
 
245
- .document-card {{
246
- background: #1a1a1f;
247
- border-left: 4px solid var(--secondary);
248
- border-radius: 8px;
249
- padding: 1.2rem;
250
- margin: 1rem 0;
251
- box-shadow: 0 2px 8px rgba(0,0,0,0.3);
252
- }}
253
- </style>
254
- """, unsafe_allow_html=True)
255
-
256
- def _init_session_state(self):
257
- if "conversation" not in st.session_state:
258
- st.session_state.conversation = []
259
- if "last_request" not in st.session_state:
260
- st.session_state.last_request = 0
261
-
262
- def render(self):
263
- st.title("🧠 NeuroSphere AI Research Analyst")
264
- self._render_sidebar()
265
- self._render_main_interface()
266
-
267
- def _render_sidebar(self):
268
- with st.sidebar:
269
- st.header("📡 Knowledge Nucleus")
270
- with st.expander("🔬 Research Corpus", expanded=True):
271
- for text in research_texts:
272
- st.markdown(f'<div class="document-card">{text}</div>',
273
- unsafe_allow_html=True)
274
 
275
- with st.expander("🚀 Development Hub", expanded=True):
276
- for text in development_texts:
277
- st.markdown(f'<div class="document-card">{text}</div>',
278
- unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- st.divider()
281
- self._render_analytics()
282
-
283
- def _render_analytics(self):
284
- st.subheader("📊 Cognitive Metrics")
285
- col1, col2 = st.columns(2)
286
- col1.metric("Processing Speed", "42ms", "-3ms")
287
- col2.metric("Accuracy Confidence", "98.7%", "+0.5%")
288
- st.progress(0.87, text="Knowledge Coverage")
289
-
290
- def _render_main_interface(self):
291
- col1, col2 = st.columns([1, 2])
292
 
293
- with col1:
294
- self._render_chat_interface()
295
-
296
- with col2:
297
- self._render_analysis_panel()
 
 
 
 
 
 
 
 
 
298
 
299
- def _render_chat_interface(self):
300
- with st.container(height=600, border=False):
301
- st.subheader("💬 NeuroDialogue Interface")
302
- query = st.chat_input("Query the knowledge universe...")
303
-
304
- if query:
305
- self._handle_query(query)
306
-
307
- for msg in st.session_state.conversation:
308
- self._render_message(msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
- def _render_analysis_panel(self):
311
- with st.container(height=600, border=False):
312
- st.subheader("🔍 Deep Analysis Matrix")
313
- # Implement advanced visualization components
314
 
315
- def _handle_query(self, query: str):
316
- # Implement enterprise query handling with rate limiting
317
- pass
318
 
319
- def _render_message(self, msg: dict):
320
- # Implement holographic message rendering
321
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
- # ------------------------------
324
- # Quantum Execution Core
325
- # ------------------------------
326
  if __name__ == "__main__":
327
- interface = HolographicInterface()
328
- interface.render()
329
- engine = EnterpriseWorkflowEngine()
 
 
 
 
 
1
  """
2
+ AI Research Assistant
3
  """
4
 
5
  # ------------------------------
6
+ # Core Imports & Configuration
7
  # ------------------------------
8
  import os
9
  import re
 
11
  import chromadb
12
  import requests
13
  import streamlit as st
14
+ from typing import Sequence, Tuple
15
+ from typing_extensions import TypedDict, Annotated
16
+ from langchain_core.messages import HumanMessage, AIMessage
 
 
17
  from langchain.text_splitter import RecursiveCharacterTextSplitter
18
  from langchain_community.vectorstores import Chroma
19
  from langchain.tools.retriever import create_retriever_tool
20
  from langgraph.graph import END, StateGraph
21
  from langgraph.prebuilt import ToolNode
22
+ from langgraph.graph.message import add_messages
23
  from chromadb.config import Settings
24
+ from langchain_openai import OpenAIEmbeddings
 
 
25
 
26
  # ------------------------------
27
+ # Type Definitions
28
  # ------------------------------
29
+ class AgentState(TypedDict):
30
+ messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
 
 
 
 
31
 
32
  # ------------------------------
33
+ # Configuration & Constants
34
  # ------------------------------
35
+ class Config:
36
+ API_KEY = os.environ.get("DEEPSEEK_API_KEY")
37
+ CHROMA_PATH = "chroma_db"
38
+ TEXT_SPLITTER_CONFIG = {
39
+ "chunk_size": 512,
40
+ "chunk_overlap": 128,
41
+ "separators": ["\n\n", "\n", ". ", "! ", "? "]
42
+ }
 
 
 
 
43
 
44
  # ------------------------------
45
+ # Core System Components
46
  # ------------------------------
47
+ class ResearchAssistant:
48
  def __init__(self):
49
+ self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
50
+ self.vector_stores = self._init_vector_stores()
51
+ self.tools = self._create_tools()
52
+ self.workflow = self._build_workflow()
53
+
54
+ def _init_vector_stores(self) -> Tuple[Chroma, Chroma]:
55
+ """Initialize vector stores with proper document processing"""
56
+ splitter = RecursiveCharacterTextSplitter(**Config.TEXT_SPLITTER_CONFIG)
57
 
58
+ research_docs = splitter.create_documents([
59
+ "Research Report: New AI Model Achieves 98% Image Recognition Accuracy",
60
+ "Transformers: The New NLP Architecture Standard",
61
+ "Quantum Machine Learning: Emerging Trends and Applications"
62
+ ])
63
+
64
+ development_docs = splitter.create_documents([
65
+ "Project A: UI Design Finalized, API Integration Phase",
66
+ "Project B: Feature Testing and Bug Fixes",
67
+ "Product Y: Performance Optimization Pre-Release"
68
+ ])
69
 
70
+ client = chromadb.PersistentClient(
71
+ path=Config.CHROMA_PATH,
72
+ settings=Settings(anonymized_telemetry=False)
 
 
 
 
 
 
73
 
74
+ return (
75
+ Chroma.from_documents(research_docs, self.embeddings,
76
+ client=client, collection_name="research"),
77
+ Chroma.from_documents(development_docs, self.embeddings,
78
+ client=client, collection_name="development")
79
  )
80
+
81
+ def _create_tools(self):
82
+ """Create retrieval tools with optimized search parameters"""
83
+ research_retriever = self.vector_stores[0].as_retriever(
84
+ search_kwargs={"k": 3, "score_threshold": 0.7}
85
+ )
86
+ development_retriever = self.vector_stores[1].as_retriever(
87
+ search_kwargs={"k": 3, "score_threshold": 0.7}
88
  )
89
 
90
+ return [
 
91
  create_retriever_tool(
92
+ research_retriever,
93
  "research_db",
94
+ "Access technical research papers and reports"
95
  ),
96
  create_retriever_tool(
97
+ development_retriever,
98
  "development_db",
99
+ "Retrieve project development status updates"
100
  )
101
  ]
102
+
103
+ def _build_workflow(self):
104
+ """Construct and return the processing workflow"""
105
+ workflow = StateGraph(AgentState)
 
 
 
106
 
107
+ workflow.add_node("analyze", self.analyze_query)
108
+ workflow.add_node("retrieve", ToolNode(self.tools))
109
+ workflow.add_node("synthesize", self.synthesize_response)
 
 
 
 
 
 
 
 
110
 
111
+ workflow.set_entry_point("analyze")
 
 
 
 
 
 
112
 
113
+ workflow.add_conditional_edges(
114
+ "analyze",
115
+ self._needs_retrieval,
116
+ {"retrieve": "retrieve", "direct": "synthesize"}
117
+ )
 
 
 
 
 
 
118
 
119
+ workflow.add_edge("retrieve", "synthesize")
120
+ workflow.add_edge("synthesize", END)
121
+
122
+ return workflow.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ def _needs_retrieval(self, state: AgentState) -> str:
125
+ """Determine if document retrieval is needed"""
126
+ query = state["messages"][-1].content.lower()
127
+ return "retrieve" if any(kw in query for kw in {
128
+ "research", "study", "project", "develop", "trend"
129
+ }) else "direct"
 
 
130
 
131
+ def analyze_query(self, state: AgentState):
132
+ """Analyze user query and determine next steps"""
133
+ try:
134
+ user_input = state["messages"][-1].content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ headers = {
137
+ "Authorization": f"Bearer {Config.API_KEY}",
138
+ "Content-Type": "application/json"
139
+ }
 
140
 
141
+ response = requests.post(
142
+ "https://api.deepseek.com/v1/chat/completions",
143
+ headers=headers,
144
+ json={
145
+ "model": "deepseek-chat",
146
+ "messages": [{
147
+ "role": "user",
148
+ "content": f"""Analyze this query and format as:
149
+ CATEGORY: [RESEARCH|DEVELOPMENT|GENERAL]
150
+ KEY_TERMS: comma-separated list
151
+ {user_input}"""
152
+ }],
153
+ "temperature": 0.3
154
+ },
155
+ timeout=15
156
+ )
157
 
158
+ response.raise_for_status()
159
+ analysis = response.json()["choices"][0]["message"]["content"]
 
 
 
 
 
 
160
 
161
+ return {"messages": [AIMessage(content=analysis)]}
162
+
163
+ except Exception as e:
164
+ return {"messages": [AIMessage(
165
+ content=f"Analysis Error: {str(e)}. Please rephrase your question."
166
+ )]}
167
+
168
+ def synthesize_response(self, state: AgentState):
169
+ """Generate final response with citations"""
170
+ try:
171
+ context = "\n".join([msg.content for msg in state["messages"]])
172
 
173
+ headers = {
174
+ "Authorization": f"Bearer {Config.API_KEY}",
175
+ "Content-Type": "application/json"
176
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ response = requests.post(
179
+ "https://api.deepseek.com/v1/chat/completions",
180
+ headers=headers,
181
+ json={
182
+ "model": "deepseek-chat",
183
+ "messages": [{
184
+ "role": "user",
185
+ "content": f"""Synthesize this information:
186
+ {context}
187
+
188
+ Include:
189
+ 1. Key findings
190
+ 2. Supporting evidence
191
+ 3. Technical details
192
+ 4. Potential applications"""
193
+ }],
194
+ "temperature": 0.5
195
+ },
196
+ timeout=20
197
+ )
198
 
199
+ response.raise_for_status()
200
+ return {"messages": [AIMessage(
201
+ content=response.json()["choices"][0]["message"]["content"]
202
+ )]}
 
 
 
 
 
 
 
 
203
 
204
+ except Exception as e:
205
+ return {"messages": [AIMessage(
206
+ content=f"Synthesis Error: {str(e)}. Please try again later."
207
+ )]}
208
+
209
+ # ------------------------------
210
+ # Professional UI Interface
211
+ # ------------------------------
212
+ def main():
213
+ st.set_page_config(
214
+ page_title="Research Assistant Pro",
215
+ layout="wide",
216
+ initial_sidebar_state="expanded"
217
+ )
218
 
219
+ # Dark theme implementation
220
+ st.markdown("""
221
+ <style>
222
+ .stApp {
223
+ background-color: #0f1114;
224
+ color: #ffffff;
225
+ }
226
+ .stTextInput input, .stTextArea textarea {
227
+ background-color: #1e1e24 !important;
228
+ color: #ffffff !important;
229
+ }
230
+ .stButton>button {
231
+ background: #2563eb;
232
+ transition: all 0.2s;
233
+ }
234
+ .stButton>button:hover {
235
+ background: #1d4ed8;
236
+ transform: scale(1.02);
237
+ }
238
+ .result-card {
239
+ background: #1a1a1f;
240
+ border-radius: 8px;
241
+ padding: 1.5rem;
242
+ margin: 1rem 0;
243
+ }
244
+ </style>
245
+ """, unsafe_allow_html=True)
246
 
247
+ st.title("🔍 Research Assistant Pro")
248
+ st.write("Advanced AI-Powered Research Analysis")
 
 
249
 
250
+ col1, col2 = st.columns([1, 2])
 
 
251
 
252
+ with col1:
253
+ with st.form("query_form"):
254
+ query = st.text_area("Research Query:", height=150,
255
+ placeholder="Enter your research question...")
256
+ submitted = st.form_submit_button("Analyze")
257
+
258
+ if submitted and query:
259
+ with st.spinner("Processing..."):
260
+ try:
261
+ assistant = ResearchAssistant()
262
+ result = assistant.workflow.invoke({"messages": [
263
+ HumanMessage(content=query)
264
+ ]})
265
+
266
+ with st.expander("Analysis Details", expanded=True):
267
+ st.markdown(f"""
268
+ <div class="result-card">
269
+ {result['messages'][-1].content}
270
+ </div>
271
+ """, unsafe_allow_html=True)
272
+ except Exception as e:
273
+ st.error(f"Processing Error: {str(e)}")
274
+
275
+ with col2:
276
+ st.subheader("Knowledge Base")
277
+ with st.expander("Research Documents"):
278
+ st.info("""
279
+ - Advanced Image Recognition Systems
280
+ - Transformer Architecture Analysis
281
+ - Quantum ML Research
282
+ """)
283
+
284
+ with st.expander("Development Updates"):
285
+ st.info("""
286
+ - Project A: API Integration Phase
287
+ - Project B: Feature Testing
288
+ - Product Y: Optimization Stage
289
+ """)
290
 
 
 
 
291
  if __name__ == "__main__":
292
+ if not Config.API_KEY:
293
+ st.error("""
294
+ 🔑 Configuration Required:
295
+ Set DEEPSEEK_API_KEY environment variable
296
+ """)
297
+ st.stop()
298
+ main()