shukdevdattaEX commited on
Commit
acfaf02
·
verified ·
1 Parent(s): 3336c17

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +565 -0
app.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import time
5
+ from datetime import datetime
6
+ from typing import List, Dict, Any, Optional, Tuple
7
+ import tempfile
8
+ import base64
9
+ from pathlib import Path
10
+
11
+ # Core dependencies
12
+ try:
13
+ from together import Together
14
+ import PyPDF2
15
+ from PIL import Image
16
+ import speech_recognition as sr
17
+ import io
18
+ import subprocess
19
+ import sys
20
+ except ImportError as e:
21
+ print(f"Missing dependency: {e}")
22
+ print("Install with: pip install together PyPDF2 pillow speechrecognition pyaudio")
23
+ sys.exit(1)
24
+
25
+ class ConversationMemory:
26
+ """Manages conversation context and memory across sessions"""
27
+
28
+ def __init__(self):
29
+ self.conversations = []
30
+ self.context_graph = {}
31
+ self.session_data = {}
32
+
33
+ def add_interaction(self, input_type: str, content: str, response: str, metadata: Dict = None):
34
+ interaction = {
35
+ "timestamp": datetime.now().isoformat(),
36
+ "input_type": input_type,
37
+ "content": content[:500] + "..." if len(content) > 500 else content, # Truncate for memory
38
+ "response": response[:1000] + "..." if len(response) > 1000 else response,
39
+ "metadata": metadata or {}
40
+ }
41
+ self.conversations.append(interaction)
42
+
43
+ def get_relevant_context(self, query: str, limit: int = 3) -> List[Dict]:
44
+ # Simple relevance scoring - in production, use embeddings
45
+ relevant = []
46
+ query_lower = query.lower()
47
+
48
+ for conv in reversed(self.conversations[-10:]): # Check last 10 interactions
49
+ score = 0
50
+ content_lower = conv["content"].lower()
51
+ response_lower = conv["response"].lower()
52
+
53
+ # Simple keyword matching
54
+ for word in query_lower.split():
55
+ if len(word) > 3: # Skip short words
56
+ if word in content_lower or word in response_lower:
57
+ score += 1
58
+
59
+ if score > 0:
60
+ relevant.append((score, conv))
61
+
62
+ # Sort by relevance and return top results
63
+ relevant.sort(key=lambda x: x[0], reverse=True)
64
+ return [conv for score, conv in relevant[:limit]]
65
+
66
+ class NexusAI:
67
+ """Main AI processing class"""
68
+
69
+ def __init__(self, api_key: str = None):
70
+ self.api_key = api_key
71
+ self.client = None
72
+ self.memory = ConversationMemory()
73
+
74
+ if api_key:
75
+ self.initialize_client(api_key)
76
+
77
+ def initialize_client(self, api_key: str):
78
+ """Initialize Together AI client"""
79
+ try:
80
+ self.client = Together(api_key=api_key)
81
+ self.api_key = api_key
82
+ return True, "API key initialized successfully!"
83
+ except Exception as e:
84
+ return False, f"Failed to initialize API key: {str(e)}"
85
+
86
+ def extract_text_from_pdf(self, pdf_path: str) -> str:
87
+ """Extract text from PDF file"""
88
+ try:
89
+ with open(pdf_path, 'rb') as file:
90
+ pdf_reader = PyPDF2.PdfReader(file)
91
+ text = ""
92
+ for page in pdf_reader.pages:
93
+ text += page.extract_text() + "\n"
94
+ return text.strip()
95
+ except Exception as e:
96
+ return f"Error reading PDF: {str(e)}"
97
+
98
+ def analyze_image(self, image_path: str) -> str:
99
+ """Analyze image and return description"""
100
+ try:
101
+ with Image.open(image_path) as img:
102
+ # Basic image analysis - in production, use vision models
103
+ width, height = img.size
104
+ mode = img.mode
105
+ format_type = img.format
106
+
107
+ description = f"Image Analysis:\n"
108
+ description += f"- Dimensions: {width}x{height} pixels\n"
109
+ description += f"- Color mode: {mode}\n"
110
+ description += f"- Format: {format_type}\n"
111
+
112
+ # Simple color analysis
113
+ if mode == "RGB":
114
+ # Get dominant colors (simplified)
115
+ img_small = img.resize((50, 50))
116
+ colors = img_small.getcolors(2500)
117
+ if colors:
118
+ dominant_color = max(colors, key=lambda x: x[0])[1]
119
+ description += f"- Dominant color (RGB): {dominant_color}\n"
120
+
121
+ return description
122
+ except Exception as e:
123
+ return f"Error analyzing image: {str(e)}"
124
+
125
+ def transcribe_audio(self, audio_path: str) -> str:
126
+ """Transcribe audio to text"""
127
+ try:
128
+ r = sr.Recognizer()
129
+ with sr.AudioFile(audio_path) as source:
130
+ audio_data = r.record(source)
131
+ text = r.recognize_google(audio_data)
132
+ return text
133
+ except Exception as e:
134
+ return f"Error transcribing audio: {str(e)}"
135
+
136
+ def execute_code(self, code: str, language: str = "python") -> str:
137
+ """Execute code safely (basic implementation)"""
138
+ try:
139
+ if language.lower() == "python":
140
+ # Create a temporary file
141
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
142
+ f.write(code)
143
+ temp_file = f.name
144
+
145
+ # Execute with timeout
146
+ try:
147
+ result = subprocess.run([sys.executable, temp_file],
148
+ capture_output=True, text=True, timeout=10)
149
+ output = result.stdout
150
+ if result.stderr:
151
+ output += f"\nErrors:\n{result.stderr}"
152
+ return output
153
+ except subprocess.TimeoutExpired:
154
+ return "Code execution timed out (10s limit)"
155
+ finally:
156
+ os.unlink(temp_file)
157
+ else:
158
+ return f"Language '{language}' not supported yet. Only Python is available."
159
+ except Exception as e:
160
+ return f"Error executing code: {str(e)}"
161
+
162
+ def build_context_messages(self, user_input: str, input_type: str, extracted_content: str = "") -> List[Dict]:
163
+ """Build context messages for the AI model"""
164
+ messages = []
165
+
166
+ # Add system message
167
+ system_msg = """You are Nexus AI, a creative multimodal assistant that helps users across different types of content.
168
+ You excel at connecting insights across text, documents, images, voice, and code. Always provide helpful,
169
+ contextual responses that build on previous interactions when relevant."""
170
+
171
+ messages.append({"role": "system", "content": system_msg})
172
+
173
+ # Add relevant conversation history
174
+ relevant_context = self.memory.get_relevant_context(user_input)
175
+ for context in relevant_context:
176
+ messages.append({
177
+ "role": "assistant",
178
+ "content": f"[Previous {context['input_type']} interaction] {context['response'][:200]}..."
179
+ })
180
+
181
+ # Build current user message
182
+ current_content = f"Input Type: {input_type}\n\n"
183
+
184
+ if extracted_content:
185
+ current_content += f"Extracted Content:\n{extracted_content[:2000]}...\n\n" if len(extracted_content) > 2000 else f"Extracted Content:\n{extracted_content}\n\n"
186
+
187
+ current_content += f"User Query: {user_input}"
188
+
189
+ messages.append({"role": "user", "content": current_content})
190
+
191
+ return messages
192
+
193
+ def generate_response(self, user_input: str, input_type: str, extracted_content: str = "") -> str:
194
+ """Generate AI response using AFM-4.5B model"""
195
+ if not self.client:
196
+ return "❌ Please initialize your Together AI API key first!"
197
+
198
+ try:
199
+ messages = self.build_context_messages(user_input, input_type, extracted_content)
200
+
201
+ response = self.client.chat.completions.create(
202
+ model="arcee-ai/AFM-4.5B-Preview",
203
+ messages=messages,
204
+ max_tokens=1024,
205
+ temperature=0.7
206
+ )
207
+
208
+ ai_response = response.choices[0].message.content
209
+
210
+ # Store interaction in memory
211
+ self.memory.add_interaction(
212
+ input_type=input_type,
213
+ content=user_input + ("\n" + extracted_content if extracted_content else ""),
214
+ response=ai_response
215
+ )
216
+
217
+ return ai_response
218
+
219
+ except Exception as e:
220
+ return f"❌ Error generating response: {str(e)}"
221
+
222
+ # Initialize the AI assistant
223
+ nexus_ai = NexusAI()
224
+
225
+ def initialize_api_key(api_key: str) -> Tuple[str, str]:
226
+ """Initialize the API key"""
227
+ if not api_key.strip():
228
+ return "❌ Please enter a valid API key", "error"
229
+
230
+ success, message = nexus_ai.initialize_client(api_key.strip())
231
+ status = "success" if success else "error"
232
+ return message, status
233
+
234
+ def process_text_input(user_input: str, api_key_status: str) -> str:
235
+ """Process text input"""
236
+ if api_key_status != "success":
237
+ return "❌ Please initialize your Together AI API key first!"
238
+
239
+ if not user_input.strip():
240
+ return "Please enter some text to get started!"
241
+
242
+ return nexus_ai.generate_response(user_input, "text")
243
+
244
+ def process_pdf_input(pdf_file, user_question: str, api_key_status: str) -> str:
245
+ """Process PDF input with question"""
246
+ if api_key_status != "success":
247
+ return "❌ Please initialize your Together AI API key first!"
248
+
249
+ if pdf_file is None:
250
+ return "Please upload a PDF file first!"
251
+
252
+ # Extract text from PDF
253
+ extracted_text = nexus_ai.extract_text_from_pdf(pdf_file.name)
254
+
255
+ if user_question.strip():
256
+ return nexus_ai.generate_response(user_question, "pdf", extracted_text)
257
+ else:
258
+ return nexus_ai.generate_response("Please summarize this document", "pdf", extracted_text)
259
+
260
+ def process_image_input(image_file, user_question: str, api_key_status: str) -> str:
261
+ """Process image input with question"""
262
+ if api_key_status != "success":
263
+ return "❌ Please initialize your Together AI API key first!"
264
+
265
+ if image_file is None:
266
+ return "Please upload an image file first!"
267
+
268
+ # Analyze image
269
+ image_analysis = nexus_ai.analyze_image(image_file.name)
270
+
271
+ if user_question.strip():
272
+ return nexus_ai.generate_response(user_question, "image", image_analysis)
273
+ else:
274
+ return nexus_ai.generate_response("What can you tell me about this image?", "image", image_analysis)
275
+
276
+ def process_audio_input(audio_file, user_question: str, api_key_status: str) -> str:
277
+ """Process audio input with question"""
278
+ if api_key_status != "success":
279
+ return "❌ Please initialize your Together AI API key first!"
280
+
281
+ if audio_file is None:
282
+ return "Please upload an audio file first!"
283
+
284
+ # Transcribe audio
285
+ transcribed_text = nexus_ai.transcribe_audio(audio_file.name)
286
+
287
+ if user_question.strip():
288
+ combined_input = f"Transcribed audio: '{transcribed_text}'\n\nUser question: {user_question}"
289
+ return nexus_ai.generate_response(combined_input, "audio", transcribed_text)
290
+ else:
291
+ return nexus_ai.generate_response("Please help me with this audio content", "audio", transcribed_text)
292
+
293
+ def process_code_input(code_input: str, language: str, action: str, api_key_status: str) -> str:
294
+ """Process code input"""
295
+ if api_key_status != "success":
296
+ return "❌ Please initialize your Together AI API key first!"
297
+
298
+ if not code_input.strip():
299
+ return "Please enter some code first!"
300
+
301
+ result = ""
302
+
303
+ if action == "Execute Code":
304
+ execution_result = nexus_ai.execute_code(code_input, language)
305
+ result = f"**Code Execution Result:**\n```\n{execution_result}\n```\n\n"
306
+
307
+ ai_response = nexus_ai.generate_response(
308
+ f"Please analyze this {language} code and provide insights:\n\n{code_input}",
309
+ "code",
310
+ result
311
+ )
312
+
313
+ return result + ai_response
314
+
315
+ def show_conversation_history() -> str:
316
+ """Show recent conversation history"""
317
+ if not nexus_ai.memory.conversations:
318
+ return "No conversation history yet. Start chatting to build your knowledge base!"
319
+
320
+ history = "## 📚 Recent Conversation History\n\n"
321
+ for i, conv in enumerate(nexus_ai.memory.conversations[-5:], 1): # Show last 5
322
+ timestamp = datetime.fromisoformat(conv["timestamp"]).strftime("%H:%M:%S")
323
+ history += f"**{i}. [{conv['input_type'].upper()}] {timestamp}**\n"
324
+ history += f"Input: {conv['content'][:100]}{'...' if len(conv['content']) > 100 else ''}\n"
325
+ history += f"Response: {conv['response'][:150]}{'...' if len(conv['response']) > 150 else ''}\n\n"
326
+
327
+ return history
328
+
329
+ # Create the Gradio interface
330
+ def create_nexus_interface():
331
+ with gr.Blocks(
332
+ theme=gr.themes.Soft(),
333
+ title="Nexus AI Assistant",
334
+ css="""
335
+ .gradio-container {
336
+ max-width: 1200px !important;
337
+ }
338
+ .api-key-box {
339
+ border: 2px solid #e1e5e9;
340
+ border-radius: 8px;
341
+ padding: 15px;
342
+ margin-bottom: 20px;
343
+ background-color: #f8f9fa;
344
+ }
345
+ """
346
+ ) as app:
347
+
348
+ # Header
349
+ gr.HTML("""
350
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
351
+ <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">🚀 Nexus AI Assistant</h1>
352
+ <p style="color: white; margin: 10px 0 0 0; font-size: 1.2em;">Creative Multimodal AI Powered by AFM-4.5B</p>
353
+ </div>
354
+ """)
355
+
356
+ # API Key Section
357
+ with gr.Group(elem_classes=["api-key-box"]):
358
+ gr.HTML("<h3>🔑 API Configuration</h3>")
359
+ with gr.Row():
360
+ api_key_input = gr.Textbox(
361
+ label="Together AI API Key",
362
+ type="password",
363
+ placeholder="Enter your Together AI API key here...",
364
+ scale=3
365
+ )
366
+ api_key_btn = gr.Button("Initialize API Key", variant="primary", scale=1)
367
+
368
+ api_key_status = gr.Textbox(
369
+ label="Status",
370
+ interactive=False,
371
+ value="Please enter your API key to get started"
372
+ )
373
+
374
+ # Hidden state to track API key status
375
+ api_key_state = gr.State(value="not_initialized")
376
+
377
+ # Main Interface Tabs
378
+ with gr.Tabs():
379
+
380
+ # Text Chat Tab
381
+ with gr.Tab("💬 Text Chat"):
382
+ with gr.Column():
383
+ text_input = gr.Textbox(
384
+ label="Your Message",
385
+ placeholder="Ask me anything! I can help with creative tasks, analysis, problem-solving...",
386
+ lines=3
387
+ )
388
+ text_btn = gr.Button("Send Message", variant="primary")
389
+ text_output = gr.Textbox(
390
+ label="Nexus AI Response",
391
+ lines=8,
392
+ interactive=False
393
+ )
394
+
395
+ # PDF Analysis Tab
396
+ with gr.Tab("📄 PDF Analysis"):
397
+ with gr.Row():
398
+ with gr.Column(scale=1):
399
+ pdf_file = gr.File(
400
+ label="Upload PDF",
401
+ file_types=[".pdf"]
402
+ )
403
+ pdf_question = gr.Textbox(
404
+ label="Question about PDF (optional)",
405
+ placeholder="What would you like to know about this document?",
406
+ lines=2
407
+ )
408
+ pdf_btn = gr.Button("Analyze PDF", variant="primary")
409
+
410
+ with gr.Column(scale=1):
411
+ pdf_output = gr.Textbox(
412
+ label="Analysis Result",
413
+ lines=12,
414
+ interactive=False
415
+ )
416
+
417
+ # Image Analysis Tab
418
+ with gr.Tab("🖼️ Image Analysis"):
419
+ with gr.Row():
420
+ with gr.Column(scale=1):
421
+ image_file = gr.Image(
422
+ label="Upload Image",
423
+ type="filepath"
424
+ )
425
+ image_question = gr.Textbox(
426
+ label="Question about Image (optional)",
427
+ placeholder="What would you like to know about this image?",
428
+ lines=2
429
+ )
430
+ image_btn = gr.Button("Analyze Image", variant="primary")
431
+
432
+ with gr.Column(scale=1):
433
+ image_output = gr.Textbox(
434
+ label="Analysis Result",
435
+ lines=12,
436
+ interactive=False
437
+ )
438
+
439
+ # Voice Processing Tab
440
+ with gr.Tab("🎤 Voice Processing"):
441
+ with gr.Row():
442
+ with gr.Column(scale=1):
443
+ audio_file = gr.Audio(
444
+ label="Upload Audio",
445
+ type="filepath"
446
+ )
447
+ audio_question = gr.Textbox(
448
+ label="Additional Question (optional)",
449
+ placeholder="Any specific question about the audio content?",
450
+ lines=2
451
+ )
452
+ audio_btn = gr.Button("Process Audio", variant="primary")
453
+
454
+ with gr.Column(scale=1):
455
+ audio_output = gr.Textbox(
456
+ label="Processing Result",
457
+ lines=12,
458
+ interactive=False
459
+ )
460
+
461
+ # Code Executor Tab
462
+ with gr.Tab("⚡ Code Executor"):
463
+ with gr.Row():
464
+ with gr.Column(scale=1):
465
+ code_input = gr.Code(
466
+ label="Code Input",
467
+ language="python",
468
+ lines=10
469
+ )
470
+ with gr.Row():
471
+ language_select = gr.Dropdown(
472
+ choices=["python", "javascript", "java", "cpp"],
473
+ value="python",
474
+ label="Language",
475
+ scale=1
476
+ )
477
+ code_action = gr.Radio(
478
+ choices=["Execute Code", "Analyze Only"],
479
+ value="Execute Code",
480
+ label="Action",
481
+ scale=1
482
+ )
483
+ code_btn = gr.Button("Process Code", variant="primary")
484
+
485
+ with gr.Column(scale=1):
486
+ code_output = gr.Textbox(
487
+ label="Result & Analysis",
488
+ lines=15,
489
+ interactive=False
490
+ )
491
+
492
+ # Memory & History Tab
493
+ with gr.Tab("🧠 Memory & History"):
494
+ with gr.Column():
495
+ gr.HTML("<h3>Conversation Memory</h3>")
496
+ gr.HTML("<p>Nexus AI remembers your interactions and can connect insights across different input types.</p>")
497
+
498
+ history_btn = gr.Button("Show Recent History", variant="secondary")
499
+ history_output = gr.Textbox(
500
+ label="Conversation History",
501
+ lines=15,
502
+ interactive=False
503
+ )
504
+
505
+ # Event handlers
506
+ api_key_btn.click(
507
+ fn=initialize_api_key,
508
+ inputs=[api_key_input],
509
+ outputs=[api_key_status, api_key_state]
510
+ )
511
+
512
+ text_btn.click(
513
+ fn=process_text_input,
514
+ inputs=[text_input, api_key_state],
515
+ outputs=[text_output]
516
+ )
517
+
518
+ pdf_btn.click(
519
+ fn=process_pdf_input,
520
+ inputs=[pdf_file, pdf_question, api_key_state],
521
+ outputs=[pdf_output]
522
+ )
523
+
524
+ image_btn.click(
525
+ fn=process_image_input,
526
+ inputs=[image_file, image_question, api_key_state],
527
+ outputs=[image_output]
528
+ )
529
+
530
+ audio_btn.click(
531
+ fn=process_audio_input,
532
+ inputs=[audio_file, audio_question, api_key_state],
533
+ outputs=[audio_output]
534
+ )
535
+
536
+ code_btn.click(
537
+ fn=process_code_input,
538
+ inputs=[code_input, language_select, code_action, api_key_state],
539
+ outputs=[code_output]
540
+ )
541
+
542
+ history_btn.click(
543
+ fn=show_conversation_history,
544
+ outputs=[history_output]
545
+ )
546
+
547
+ # Footer
548
+ gr.HTML("""
549
+ <div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #e1e5e9;">
550
+ <p style="color: #666;">🚀 <strong>Nexus AI Assistant</strong> - Powered by AFM-4.5B | Built with ❤️ using Gradio</p>
551
+ <p style="color: #888; font-size: 0.9em;">Multi-modal AI assistant for creative and analytical tasks</p>
552
+ </div>
553
+ """)
554
+
555
+ return app
556
+
557
+ # Launch the application
558
+ if __name__ == "__main__":
559
+ app = create_nexus_interface()
560
+ app.launch(
561
+ server_name="0.0.0.0",
562
+ server_port=7860,
563
+ share=True,
564
+ debug=True
565
+ )