Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,30 +1,21 @@
|
|
|
|
|
|
1 |
import os
|
2 |
import tempfile
|
3 |
import uuid
|
|
|
|
|
|
|
|
|
|
|
4 |
import base64
|
|
|
5 |
import io
|
|
|
6 |
import json
|
7 |
import re
|
8 |
from datetime import datetime, timedelta
|
9 |
|
10 |
-
# Third-party imports
|
11 |
-
import gradio as gr
|
12 |
-
import groq
|
13 |
-
import numpy as np
|
14 |
-
import pandas as pd
|
15 |
-
import openpyxl
|
16 |
-
import requests
|
17 |
-
import fitz # PyMuPDF
|
18 |
-
from PIL import Image
|
19 |
-
from dotenv import load_dotenv
|
20 |
-
from transformers import AutoProcessor, AutoModelForVision2Seq
|
21 |
-
import torch
|
22 |
-
|
23 |
-
# LangChain imports
|
24 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
25 |
-
from langchain_community.vectorstores import FAISS
|
26 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
27 |
-
|
28 |
# Load environment variables
|
29 |
load_dotenv()
|
30 |
client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
|
@@ -38,19 +29,6 @@ if not os.path.exists(FAISS_INDEX_DIR):
|
|
38 |
# Dictionary to store user-specific vectorstores
|
39 |
user_vectorstores = {}
|
40 |
|
41 |
-
# Load SmolDocling model for image analysis
|
42 |
-
def load_docling_model():
|
43 |
-
try:
|
44 |
-
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
45 |
-
model = AutoModelForVision2Seq.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
46 |
-
return processor, model
|
47 |
-
except Exception as e:
|
48 |
-
print(f"Error loading SmolDocling model: {e}")
|
49 |
-
return None, None
|
50 |
-
|
51 |
-
# Initialize SmolDocling model
|
52 |
-
docling_processor, docling_model = load_docling_model()
|
53 |
-
|
54 |
# Custom CSS for Tech theme
|
55 |
custom_css = """
|
56 |
:root {
|
@@ -92,90 +70,6 @@ body { background-color: var(--light-background); font-family: 'Google Sans', 'R
|
|
92 |
.qa-body { color: var(--dark-text); font-size: 0.95rem; margin-bottom: 10px; }
|
93 |
.qa-meta { display: flex; justify-content: space-between; color: #5F6368; font-size: 0.85rem; }
|
94 |
.tag { background-color: #E8F0FE; color: var(--primary-color); padding: 4px 8px; border-radius: 4px; font-size: 0.8rem; margin-right: 5px; display: inline-block; }
|
95 |
-
.toggle-container { display: flex; align-items: center; margin-bottom: 15px; }
|
96 |
-
.toggle-label { margin-right: 10px; font-weight: 500; }
|
97 |
-
.search-toggle { margin-left: 5px; }
|
98 |
-
.voice-btn { background-color: var(--primary-color) !important; border-radius: 50% !important; width: 44px !important; height: 44px !important; display: flex !important; align-items: center !important; justify-content: center !important; color: var(--white) !important; box-shadow: 0 2px 5px rgba(0,0,0,0.2) !important; }
|
99 |
-
.speak-btn { background-color: var(--secondary-color) !important; border-radius: 24px !important; color: var(--white) !important; padding: 8px 16px !important; font-weight: 500 !important; margin-left: 10px !important; }
|
100 |
-
.audio-controls { display: flex; align-items: center; margin-top: 10px; }
|
101 |
-
/* Audio Visualization Elements */
|
102 |
-
.audio-visualization {
|
103 |
-
display: flex;
|
104 |
-
align-items: center;
|
105 |
-
justify-content: center;
|
106 |
-
gap: 4px;
|
107 |
-
height: 40px;
|
108 |
-
padding: 10px;
|
109 |
-
background-color: rgba(0,0,0,0.05);
|
110 |
-
border-radius: 12px;
|
111 |
-
margin: 10px 0;
|
112 |
-
}
|
113 |
-
.audio-bar {
|
114 |
-
width: 3px;
|
115 |
-
background-color: var(--accent-color);
|
116 |
-
border-radius: 2px;
|
117 |
-
height: 5px;
|
118 |
-
transition: height 0.1s ease;
|
119 |
-
}
|
120 |
-
.audio-status {
|
121 |
-
font-size: 0.85rem;
|
122 |
-
color: var(--secondary-color);
|
123 |
-
text-align: center;
|
124 |
-
margin-top: 5px;
|
125 |
-
font-style: italic;
|
126 |
-
}
|
127 |
-
.recording-indicator {
|
128 |
-
width: 12px;
|
129 |
-
height: 12px;
|
130 |
-
border-radius: 50%;
|
131 |
-
background-color: #ff4b4b;
|
132 |
-
margin-right: 8px;
|
133 |
-
animation: blink 1s infinite;
|
134 |
-
}
|
135 |
-
.playing-indicator {
|
136 |
-
width: 12px;
|
137 |
-
height: 12px;
|
138 |
-
border-radius: 50%;
|
139 |
-
background-color: #4bff4b;
|
140 |
-
margin-right: 8px;
|
141 |
-
animation: pulse 1s infinite;
|
142 |
-
}
|
143 |
-
@keyframes blink {
|
144 |
-
0% { opacity: 1; }
|
145 |
-
50% { opacity: 0.4; }
|
146 |
-
100% { opacity: 1; }
|
147 |
-
}
|
148 |
-
@keyframes pulse {
|
149 |
-
0% { transform: scale(1); }
|
150 |
-
50% { transform: scale(1.2); }
|
151 |
-
100% { transform: scale(1); }
|
152 |
-
}
|
153 |
-
.file-upload-enhancement .file-preview {
|
154 |
-
max-height: 200px;
|
155 |
-
overflow: auto;
|
156 |
-
border: 1px solid var(--border-color);
|
157 |
-
border-radius: 8px;
|
158 |
-
padding: 10px;
|
159 |
-
margin-top: 10px;
|
160 |
-
background-color: rgba(0,0,0,0.02);
|
161 |
-
}
|
162 |
-
.excel-preview-table {
|
163 |
-
width: 100%;
|
164 |
-
border-collapse: collapse;
|
165 |
-
font-size: 0.85rem;
|
166 |
-
}
|
167 |
-
.excel-preview-table th, .excel-preview-table td {
|
168 |
-
border: 1px solid #ddd;
|
169 |
-
padding: 4px 8px;
|
170 |
-
text-align: left;
|
171 |
-
}
|
172 |
-
.excel-preview-table th {
|
173 |
-
background-color: var(--secondary-color);
|
174 |
-
color: white;
|
175 |
-
}
|
176 |
-
.excel-preview-table tr:nth-child(even) {
|
177 |
-
background-color: rgba(0,0,0,0.03);
|
178 |
-
}
|
179 |
"""
|
180 |
|
181 |
# Function to process PDF files
|
@@ -215,146 +109,8 @@ def process_pdf(pdf_file):
|
|
215 |
os.unlink(pdf_path)
|
216 |
return None, f"Error processing PDF: {str(e)}", {"page_images": [], "total_pages": 0, "total_words": 0}
|
217 |
|
218 |
-
# New function to process Excel files
|
219 |
-
def process_excel(excel_file):
|
220 |
-
if excel_file is None:
|
221 |
-
return None, "No file uploaded", {"data_preview": "", "total_sheets": 0, "total_rows": 0}
|
222 |
-
|
223 |
-
try:
|
224 |
-
session_id = str(uuid.uuid4())
|
225 |
-
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
|
226 |
-
temp_file.write(excel_file)
|
227 |
-
excel_path = temp_file.name
|
228 |
-
|
229 |
-
# Read Excel file with pandas
|
230 |
-
excel_data = pd.ExcelFile(excel_path)
|
231 |
-
sheet_names = excel_data.sheet_names
|
232 |
-
all_texts = []
|
233 |
-
total_rows = 0
|
234 |
-
|
235 |
-
# Process each sheet
|
236 |
-
for sheet in sheet_names:
|
237 |
-
df = pd.read_excel(excel_path, sheet_name=sheet)
|
238 |
-
total_rows += len(df)
|
239 |
-
|
240 |
-
# Convert dataframe to text for vectorization
|
241 |
-
sheet_text = f"Sheet: {sheet}\n"
|
242 |
-
sheet_text += df.to_string(index=False)
|
243 |
-
all_texts.append(sheet_text)
|
244 |
-
|
245 |
-
# Generate HTML preview of first sheet
|
246 |
-
first_df = pd.read_excel(excel_path, sheet_name=0)
|
247 |
-
preview_rows = min(10, len(first_df))
|
248 |
-
data_preview = first_df.head(preview_rows).to_html(classes="excel-preview-table", index=False)
|
249 |
-
|
250 |
-
# Process for vectorstore
|
251 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
252 |
-
chunks = text_splitter.create_documents(all_texts)
|
253 |
-
vectorstore = FAISS.from_documents(chunks, embeddings)
|
254 |
-
index_path = os.path.join(FAISS_INDEX_DIR, session_id)
|
255 |
-
vectorstore.save_local(index_path)
|
256 |
-
user_vectorstores[session_id] = vectorstore
|
257 |
-
|
258 |
-
os.unlink(excel_path)
|
259 |
-
excel_state = {"data_preview": data_preview, "total_sheets": len(sheet_names), "total_rows": total_rows}
|
260 |
-
return session_id, f"✅ Successfully processed {len(chunks)} text chunks from Excel file", excel_state
|
261 |
-
except Exception as e:
|
262 |
-
if "excel_path" in locals() and os.path.exists(excel_path):
|
263 |
-
os.unlink(excel_path)
|
264 |
-
return None, f"Error processing Excel file: {str(e)}", {"data_preview": "", "total_sheets": 0, "total_rows": 0}
|
265 |
-
|
266 |
-
# Function to analyze image using SmolDocling
|
267 |
-
def analyze_image(image_file):
|
268 |
-
if image_file is None:
|
269 |
-
return "No image uploaded. Please upload an image to analyze."
|
270 |
-
|
271 |
-
if docling_processor is None or docling_model is None:
|
272 |
-
return "SmolDocling model not loaded. Please check your installation."
|
273 |
-
|
274 |
-
try:
|
275 |
-
# Process the image - image_file is a filepath string from Gradio
|
276 |
-
image = Image.open(image_file)
|
277 |
-
|
278 |
-
# Use the SmolDocling model
|
279 |
-
inputs = docling_processor(images=image, return_tensors="pt")
|
280 |
-
with torch.no_grad():
|
281 |
-
outputs = docling_model.generate(
|
282 |
-
**inputs,
|
283 |
-
max_new_tokens=512,
|
284 |
-
temperature=0.1,
|
285 |
-
do_sample=False
|
286 |
-
)
|
287 |
-
|
288 |
-
# Decode the output
|
289 |
-
result = docling_processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
290 |
-
|
291 |
-
# Format the result for display with technical emphasis
|
292 |
-
analysis = f"## Technical Document Analysis Results\n\n{result}\n\n"
|
293 |
-
analysis += "### Technical Insights\n\n"
|
294 |
-
analysis += "* The analysis provides technical information extracted from the document image.\n"
|
295 |
-
analysis += "* Consider this information as a starting point for further technical investigation.\n"
|
296 |
-
analysis += "* For code snippets or technical specifications, verify accuracy before implementation.\n"
|
297 |
-
|
298 |
-
return analysis
|
299 |
-
except Exception as e:
|
300 |
-
return f"Error analyzing image: {str(e)}"
|
301 |
-
|
302 |
-
# Function to handle different file types
|
303 |
-
def process_file(file_data, file_type):
|
304 |
-
if file_data is None:
|
305 |
-
return None, "No file uploaded", None
|
306 |
-
|
307 |
-
if file_type == "pdf":
|
308 |
-
return process_pdf(file_data)
|
309 |
-
elif file_type == "excel":
|
310 |
-
return process_excel(file_data)
|
311 |
-
elif file_type == "image":
|
312 |
-
# For image files, we'll just use them directly for analysis
|
313 |
-
# But we'll return a session ID to maintain consistency
|
314 |
-
session_id = str(uuid.uuid4())
|
315 |
-
return session_id, "✅ Image file ready for analysis", None
|
316 |
-
else:
|
317 |
-
return None, "Unsupported file type", None
|
318 |
-
|
319 |
-
# Function for speech-to-text conversion
|
320 |
-
def speech_to_text():
|
321 |
-
try:
|
322 |
-
r = sr.Recognizer()
|
323 |
-
with sr.Microphone() as source:
|
324 |
-
r.adjust_for_ambient_noise(source)
|
325 |
-
audio = r.listen(source)
|
326 |
-
text = r.recognize_google(audio)
|
327 |
-
return text
|
328 |
-
except sr.UnknownValueError:
|
329 |
-
return "Could not understand audio. Please try again."
|
330 |
-
except sr.RequestError as e:
|
331 |
-
return f"Error with speech recognition service: {e}"
|
332 |
-
except Exception as e:
|
333 |
-
return f"Error converting speech to text: {str(e)}"
|
334 |
-
|
335 |
-
# Function for text-to-speech conversion
|
336 |
-
def text_to_speech(text, history):
|
337 |
-
if not text or not history:
|
338 |
-
return None
|
339 |
-
|
340 |
-
try:
|
341 |
-
# Get the last bot response
|
342 |
-
last_response = history[-1][1]
|
343 |
-
|
344 |
-
# Convert text to speech
|
345 |
-
tts = pyttsx3.init()
|
346 |
-
tts.setProperty('rate', 150)
|
347 |
-
tts.setProperty('volume', 0.9)
|
348 |
-
tts.save_to_file(last_response, "temp_output.mp3")
|
349 |
-
tts.runAndWait()
|
350 |
-
|
351 |
-
return "temp_output.mp3"
|
352 |
-
except Exception as e:
|
353 |
-
print(f"Error in text-to-speech: {e}")
|
354 |
-
return None
|
355 |
-
|
356 |
# Function to generate chatbot responses with Tech theme
|
357 |
-
def generate_response(message, session_id, model_name, history
|
358 |
if not message:
|
359 |
return history
|
360 |
try:
|
@@ -365,8 +121,8 @@ def generate_response(message, session_id, model_name, history, web_search_enabl
|
|
365 |
if docs:
|
366 |
context = "\n\nRelevant information from uploaded PDF:\n" + "\n".join(f"- {doc.page_content}" for doc in docs)
|
367 |
|
368 |
-
# Check if it's a GitHub repo search
|
369 |
-
if
|
370 |
query = re.sub(r'^/github\s+', '', message, flags=re.IGNORECASE)
|
371 |
repo_results = search_github_repos(query)
|
372 |
if repo_results:
|
@@ -383,8 +139,8 @@ def generate_response(message, session_id, model_name, history, web_search_enabl
|
|
383 |
history.append((message, "No GitHub repositories found for your query."))
|
384 |
return history
|
385 |
|
386 |
-
# Check if it's a Stack Overflow search
|
387 |
-
if
|
388 |
query = re.sub(r'^/stack\s+', '', message, flags=re.IGNORECASE)
|
389 |
qa_results = search_stackoverflow(query)
|
390 |
if qa_results:
|
@@ -681,155 +437,106 @@ def perform_stack_search(query, tag, sort_by):
|
|
681 |
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
682 |
current_session_id = gr.State(None)
|
683 |
pdf_state = gr.State({"page_images": [], "total_pages": 0, "total_words": 0})
|
684 |
-
excel_state = gr.State({"data_preview": "", "total_sheets": 0, "total_rows": 0})
|
685 |
-
file_type = gr.State("none")
|
686 |
-
audio_status = gr.State("Ready")
|
687 |
-
|
688 |
gr.HTML("""
|
689 |
<div class="header">
|
690 |
-
<div class="header-title">Tech-Vision
|
691 |
-
<div class="header-subtitle">Analyze technical documents
|
692 |
</div>
|
693 |
""")
|
694 |
with gr.Row(elem_classes="container"):
|
695 |
with gr.Column(scale=1, min_width=300):
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
pdf_upload_button = gr.Button("Process PDF", variant="primary")
|
700 |
-
|
701 |
-
with gr.TabItem("Excel"):
|
702 |
-
excel_file = gr.File(label="Upload Excel File", file_types=[".xlsx", ".xls"], type="binary")
|
703 |
-
excel_upload_button = gr.Button("Process Excel", variant="primary")
|
704 |
-
|
705 |
-
with gr.TabItem("Image"):
|
706 |
-
image_input = gr.File(
|
707 |
-
label="Upload Image",
|
708 |
-
file_types=["image"],
|
709 |
-
type="filepath"
|
710 |
-
)
|
711 |
-
analyze_btn = gr.Button("Analyze Image")
|
712 |
-
|
713 |
-
file_status = gr.Markdown("No file uploaded yet")
|
714 |
-
|
715 |
-
# Model selector
|
716 |
model_dropdown = gr.Dropdown(
|
717 |
choices=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
|
718 |
value="llama3-70b-8192",
|
719 |
label="Select Groq Model"
|
720 |
)
|
721 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
with gr.Column(scale=2, min_width=600):
|
723 |
with gr.Tabs():
|
724 |
with gr.TabItem("PDF Viewer"):
|
725 |
with gr.Column(elem_classes="pdf-viewer-container"):
|
726 |
page_slider = gr.Slider(minimum=1, maximum=1, step=1, label="Page Number", value=1)
|
727 |
pdf_image = gr.Image(label="PDF Page", type="pil", elem_classes="pdf-viewer-image")
|
728 |
-
|
729 |
|
730 |
-
with gr.TabItem("
|
731 |
-
|
732 |
-
excel_stats = gr.Markdown("No Excel file uploaded yet", elem_classes="stats-box")
|
733 |
|
734 |
-
with gr.TabItem("
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
with gr.Row(elem_classes="container"):
|
740 |
-
with gr.Column():
|
741 |
-
audio_vis = gr.HTML("""
|
742 |
-
<div class="audio-visualization">
|
743 |
-
<div class="audio-bar" style="height: 5px;"></div>
|
744 |
-
<div class="audio-bar" style="height: 12px;"></div>
|
745 |
-
<div class="audio-bar" style="height: 18px;"></div>
|
746 |
-
<div class="audio-bar" style="height: 15px;"></div>
|
747 |
-
<div class="audio-bar" style="height: 10px;"></div>
|
748 |
-
<div class="audio-bar" style="height: 20px;"></div>
|
749 |
-
<div class="audio-bar" style="height: 14px;"></div>
|
750 |
-
<div class="audio-bar" style="height: 8px;"></div>
|
751 |
-
</div>
|
752 |
-
""", visible=False)
|
753 |
-
audio_status_display = gr.Markdown("", elem_classes="audio-status")
|
754 |
|
755 |
-
# Chat interface
|
756 |
with gr.Row(elem_classes="container"):
|
757 |
with gr.Column(scale=2, min_width=600):
|
758 |
-
chatbot = gr.Chatbot(
|
759 |
-
height=400,
|
760 |
-
show_copy_button=True,
|
761 |
-
elem_classes="chat-container",
|
762 |
-
type="messages" # Use the new messages format
|
763 |
-
)
|
764 |
with gr.Row():
|
765 |
-
msg = gr.Textbox(
|
766 |
-
show_label=False,
|
767 |
-
placeholder="Ask about your document or click the microphone to speak...",
|
768 |
-
scale=5
|
769 |
-
)
|
770 |
-
voice_btn = gr.Button("🎤", elem_classes="voice-btn")
|
771 |
send_btn = gr.Button("Send", scale=1)
|
772 |
-
|
773 |
-
with gr.Row(elem_classes="audio-controls"):
|
774 |
-
clear_btn = gr.Button("Clear Conversation")
|
775 |
-
speak_btn = gr.Button("🔊 Speak Response", elem_classes="speak-btn")
|
776 |
-
audio_player = gr.Audio(label="Response Audio", type="filepath", visible=False)
|
777 |
|
778 |
-
# Event Handlers
|
779 |
-
|
780 |
-
lambda x: ("pdf", x),
|
781 |
-
inputs=[pdf_file],
|
782 |
-
outputs=[file_type, file_status]
|
783 |
-
).then(
|
784 |
process_pdf,
|
785 |
inputs=[pdf_file],
|
786 |
-
outputs=[current_session_id,
|
787 |
).then(
|
788 |
update_pdf_viewer,
|
789 |
inputs=[pdf_state],
|
790 |
-
outputs=[page_slider, pdf_image,
|
791 |
-
)
|
792 |
-
|
793 |
-
# Event Handlers for Excel processing
|
794 |
-
def update_excel_preview(state):
|
795 |
-
if not state:
|
796 |
-
return "", "No Excel file uploaded yet"
|
797 |
-
preview = state.get("data_preview", "")
|
798 |
-
sheets = state.get("total_sheets", 0)
|
799 |
-
rows = state.get("total_rows", 0)
|
800 |
-
stats = f"**Excel Statistics:**\nSheets: {sheets}\nTotal Rows: {rows}"
|
801 |
-
return preview, stats
|
802 |
-
|
803 |
-
excel_upload_button.click(
|
804 |
-
lambda x: ("excel", x),
|
805 |
-
inputs=[excel_file],
|
806 |
-
outputs=[file_type, file_status]
|
807 |
-
).then(
|
808 |
-
process_excel,
|
809 |
-
inputs=[excel_file],
|
810 |
-
outputs=[current_session_id, file_status, excel_state]
|
811 |
-
).then(
|
812 |
-
update_excel_preview,
|
813 |
-
inputs=[excel_state],
|
814 |
-
outputs=[excel_preview, excel_stats]
|
815 |
)
|
816 |
|
817 |
-
# Event Handlers for Image Analysis
|
818 |
-
analyze_btn.click(
|
819 |
-
lambda x: ("image", x),
|
820 |
-
inputs=[image_input],
|
821 |
-
outputs=[file_type, file_status]
|
822 |
-
).then(
|
823 |
-
analyze_image,
|
824 |
-
inputs=[image_input],
|
825 |
-
outputs=[image_analysis_results]
|
826 |
-
).then(
|
827 |
-
lambda x: Image.open(x) if x else None,
|
828 |
-
inputs=[image_input],
|
829 |
-
outputs=[image_preview]
|
830 |
-
)
|
831 |
-
|
832 |
-
# Chat message handling
|
833 |
msg.submit(
|
834 |
generate_response,
|
835 |
inputs=[msg, current_session_id, model_dropdown, chatbot],
|
@@ -842,55 +549,43 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
842 |
outputs=[chatbot]
|
843 |
).then(lambda: "", None, [msg])
|
844 |
|
845 |
-
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
outputs=[audio_status_display, audio_vis, msg]
|
850 |
-
)
|
851 |
-
|
852 |
-
# Improved text-to-speech with visual feedback
|
853 |
-
speak_btn.click(
|
854 |
-
text_to_speech,
|
855 |
-
inputs=[audio_status, chatbot],
|
856 |
-
outputs=[audio_status_display, audio_vis, audio_player]
|
857 |
-
).then(
|
858 |
-
lambda x: gr.update(visible=True) if x else gr.update(visible=False),
|
859 |
-
inputs=[audio_player],
|
860 |
-
outputs=[audio_player]
|
861 |
)
|
862 |
|
863 |
-
# Page navigation for PDF
|
864 |
page_slider.change(
|
865 |
update_image,
|
866 |
inputs=[page_slider, pdf_state],
|
867 |
outputs=[pdf_image]
|
868 |
)
|
869 |
|
870 |
-
#
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
880 |
-
|
881 |
-
|
882 |
-
|
883 |
-
|
884 |
-
|
885 |
-
|
|
|
886 |
)
|
887 |
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
894 |
|
895 |
# Launch the app
|
896 |
if __name__ == "__main__":
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import groq
|
3 |
import os
|
4 |
import tempfile
|
5 |
import uuid
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
10 |
+
import fitz # PyMuPDF
|
11 |
import base64
|
12 |
+
from PIL import Image
|
13 |
import io
|
14 |
+
import requests
|
15 |
import json
|
16 |
import re
|
17 |
from datetime import datetime, timedelta
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Load environment variables
|
20 |
load_dotenv()
|
21 |
client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
|
|
|
29 |
# Dictionary to store user-specific vectorstores
|
30 |
user_vectorstores = {}
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
# Custom CSS for Tech theme
|
33 |
custom_css = """
|
34 |
:root {
|
|
|
70 |
.qa-body { color: var(--dark-text); font-size: 0.95rem; margin-bottom: 10px; }
|
71 |
.qa-meta { display: flex; justify-content: space-between; color: #5F6368; font-size: 0.85rem; }
|
72 |
.tag { background-color: #E8F0FE; color: var(--primary-color); padding: 4px 8px; border-radius: 4px; font-size: 0.8rem; margin-right: 5px; display: inline-block; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
"""
|
74 |
|
75 |
# Function to process PDF files
|
|
|
109 |
os.unlink(pdf_path)
|
110 |
return None, f"Error processing PDF: {str(e)}", {"page_images": [], "total_pages": 0, "total_words": 0}
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# Function to generate chatbot responses with Tech theme
|
113 |
+
def generate_response(message, session_id, model_name, history):
|
114 |
if not message:
|
115 |
return history
|
116 |
try:
|
|
|
121 |
if docs:
|
122 |
context = "\n\nRelevant information from uploaded PDF:\n" + "\n".join(f"- {doc.page_content}" for doc in docs)
|
123 |
|
124 |
+
# Check if it's a GitHub repo search
|
125 |
+
if re.match(r'^/github\s+.+', message, re.IGNORECASE):
|
126 |
query = re.sub(r'^/github\s+', '', message, flags=re.IGNORECASE)
|
127 |
repo_results = search_github_repos(query)
|
128 |
if repo_results:
|
|
|
139 |
history.append((message, "No GitHub repositories found for your query."))
|
140 |
return history
|
141 |
|
142 |
+
# Check if it's a Stack Overflow search
|
143 |
+
if re.match(r'^/stack\s+.+', message, re.IGNORECASE):
|
144 |
query = re.sub(r'^/stack\s+', '', message, flags=re.IGNORECASE)
|
145 |
qa_results = search_stackoverflow(query)
|
146 |
if qa_results:
|
|
|
437 |
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
438 |
current_session_id = gr.State(None)
|
439 |
pdf_state = gr.State({"page_images": [], "total_pages": 0, "total_words": 0})
|
|
|
|
|
|
|
|
|
440 |
gr.HTML("""
|
441 |
<div class="header">
|
442 |
+
<div class="header-title">Tech-Vision</div>
|
443 |
+
<div class="header-subtitle">Analyze technical documents with Groq's LLM API.</div>
|
444 |
</div>
|
445 |
""")
|
446 |
with gr.Row(elem_classes="container"):
|
447 |
with gr.Column(scale=1, min_width=300):
|
448 |
+
pdf_file = gr.File(label="Upload PDF Document", file_types=[".pdf"], type="binary")
|
449 |
+
upload_button = gr.Button("Process PDF", variant="primary")
|
450 |
+
pdf_status = gr.Markdown("No PDF uploaded yet")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
model_dropdown = gr.Dropdown(
|
452 |
choices=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
|
453 |
value="llama3-70b-8192",
|
454 |
label="Select Groq Model"
|
455 |
)
|
456 |
+
|
457 |
+
# Tech Tools Section
|
458 |
+
gr.Markdown("### Developer Tools", elem_classes="tool-title")
|
459 |
+
with gr.Box(elem_classes="tool-container"):
|
460 |
+
with gr.Tabs():
|
461 |
+
with gr.TabItem("GitHub Search"):
|
462 |
+
repo_query = gr.Textbox(label="Search Query", placeholder="Enter keywords to search for repositories")
|
463 |
+
with gr.Row():
|
464 |
+
language = gr.Dropdown(
|
465 |
+
choices=["any", "JavaScript", "Python", "Java", "C++", "TypeScript", "Go", "Rust", "PHP", "C#"],
|
466 |
+
value="any",
|
467 |
+
label="Language"
|
468 |
+
)
|
469 |
+
min_stars = gr.Dropdown(
|
470 |
+
choices=["0", "10", "50", "100", "1000", "10000"],
|
471 |
+
value="0",
|
472 |
+
label="Min Stars"
|
473 |
+
)
|
474 |
+
sort_by = gr.Dropdown(
|
475 |
+
choices=["stars", "forks", "updated"],
|
476 |
+
value="stars",
|
477 |
+
label="Sort By"
|
478 |
+
)
|
479 |
+
repo_search_btn = gr.Button("Search Repositories")
|
480 |
+
|
481 |
+
with gr.TabItem("Stack Overflow"):
|
482 |
+
stack_query = gr.Textbox(label="Search Query", placeholder="Enter your technical question")
|
483 |
+
with gr.Row():
|
484 |
+
tag = gr.Dropdown(
|
485 |
+
choices=["any", "python", "javascript", "java", "c++", "react", "node.js", "android", "ios", "sql"],
|
486 |
+
value="any",
|
487 |
+
label="Tag"
|
488 |
+
)
|
489 |
+
so_sort_by = gr.Dropdown(
|
490 |
+
choices=["votes", "newest", "activity"],
|
491 |
+
value="votes",
|
492 |
+
label="Sort By"
|
493 |
+
)
|
494 |
+
so_search_btn = gr.Button("Search Stack Overflow")
|
495 |
+
|
496 |
+
with gr.TabItem("Code Explainer"):
|
497 |
+
code_input = gr.Textbox(
|
498 |
+
label="Code to Explain",
|
499 |
+
placeholder="Paste your code here...",
|
500 |
+
lines=10
|
501 |
+
)
|
502 |
+
explain_btn = gr.Button("Explain Code")
|
503 |
+
|
504 |
with gr.Column(scale=2, min_width=600):
|
505 |
with gr.Tabs():
|
506 |
with gr.TabItem("PDF Viewer"):
|
507 |
with gr.Column(elem_classes="pdf-viewer-container"):
|
508 |
page_slider = gr.Slider(minimum=1, maximum=1, step=1, label="Page Number", value=1)
|
509 |
pdf_image = gr.Image(label="PDF Page", type="pil", elem_classes="pdf-viewer-image")
|
510 |
+
stats_display = gr.Markdown("No PDF uploaded yet", elem_classes="stats-box")
|
511 |
|
512 |
+
with gr.TabItem("GitHub Results"):
|
513 |
+
repo_results = gr.Markdown("Search for repositories to see results here")
|
|
|
514 |
|
515 |
+
with gr.TabItem("Stack Overflow Results"):
|
516 |
+
stack_results = gr.Markdown("Search for questions to see results here")
|
517 |
+
|
518 |
+
with gr.TabItem("Code Explanation"):
|
519 |
+
code_explanation = gr.Markdown("Paste your code and click 'Explain Code' to see an explanation here")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
|
|
|
521 |
with gr.Row(elem_classes="container"):
|
522 |
with gr.Column(scale=2, min_width=600):
|
523 |
+
chatbot = gr.Chatbot(height=500, bubble_full_width=False, show_copy_button=True, elem_classes="chat-container")
|
|
|
|
|
|
|
|
|
|
|
524 |
with gr.Row():
|
525 |
+
msg = gr.Textbox(show_label=False, placeholder="Ask about your document, type /github to search repos, or /stack to search Stack Overflow...", scale=5)
|
|
|
|
|
|
|
|
|
|
|
526 |
send_btn = gr.Button("Send", scale=1)
|
527 |
+
clear_btn = gr.Button("Clear Conversation")
|
|
|
|
|
|
|
|
|
528 |
|
529 |
+
# Event Handlers
|
530 |
+
upload_button.click(
|
|
|
|
|
|
|
|
|
531 |
process_pdf,
|
532 |
inputs=[pdf_file],
|
533 |
+
outputs=[current_session_id, pdf_status, pdf_state]
|
534 |
).then(
|
535 |
update_pdf_viewer,
|
536 |
inputs=[pdf_state],
|
537 |
+
outputs=[page_slider, pdf_image, stats_display]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
)
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
msg.submit(
|
541 |
generate_response,
|
542 |
inputs=[msg, current_session_id, model_dropdown, chatbot],
|
|
|
549 |
outputs=[chatbot]
|
550 |
).then(lambda: "", None, [msg])
|
551 |
|
552 |
+
clear_btn.click(
|
553 |
+
lambda: ([], None, "No PDF uploaded yet", {"page_images": [], "total_pages": 0, "total_words": 0}, 0, None, "No PDF uploaded yet"),
|
554 |
+
None,
|
555 |
+
[chatbot, current_session_id, pdf_status, pdf_state, page_slider, pdf_image, stats_display]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
556 |
)
|
557 |
|
|
|
558 |
page_slider.change(
|
559 |
update_image,
|
560 |
inputs=[page_slider, pdf_state],
|
561 |
outputs=[pdf_image]
|
562 |
)
|
563 |
|
564 |
+
# Tech tool handlers
|
565 |
+
repo_search_btn.click(
|
566 |
+
perform_repo_search,
|
567 |
+
inputs=[repo_query, language, sort_by, min_stars],
|
568 |
+
outputs=[repo_results]
|
569 |
+
)
|
570 |
+
|
571 |
+
so_search_btn.click(
|
572 |
+
perform_stack_search,
|
573 |
+
inputs=[stack_query, tag, so_sort_by],
|
574 |
+
outputs=[stack_results]
|
575 |
+
)
|
576 |
+
|
577 |
+
explain_btn.click(
|
578 |
+
explain_code,
|
579 |
+
inputs=[code_input],
|
580 |
+
outputs=[code_explanation]
|
581 |
)
|
582 |
|
583 |
+
# Add footer with attribution
|
584 |
+
gr.HTML("""
|
585 |
+
<div style="text-align: center; margin-top: 20px; padding: 10px; color: #666; font-size: 0.8rem; border-top: 1px solid #eee;">
|
586 |
+
Created by Calvin Allen Crawford
|
587 |
+
</div>
|
588 |
+
""")
|
589 |
|
590 |
# Launch the app
|
591 |
if __name__ == "__main__":
|