Spaces:
Sleeping
Sleeping
import gradio as gr | |
import groq | |
import os | |
import tempfile | |
import uuid | |
from dotenv import load_dotenv | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceInstructEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
import fitz # PyMuPDF | |
import base64 | |
from PIL import Image | |
import io | |
import requests | |
import json | |
import re | |
from datetime import datetime, timedelta | |
from pathlib import Path | |
import torch | |
import numpy as np | |
# Load environment variables | |
load_dotenv() | |
client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY")) | |
# Initialize embeddings with error handling | |
try: | |
# Force CPU usage for embeddings | |
embeddings = HuggingFaceInstructEmbeddings( | |
model_name="hkunlp/instructor-base", | |
model_kwargs={"device": "cpu"} # Force CPU usage | |
) | |
except Exception as e: | |
print(f"Warning: Failed to load primary embeddings model: {e}") | |
try: | |
embeddings = HuggingFaceInstructEmbeddings( | |
model_name="all-MiniLM-L6-v2", | |
model_kwargs={"device": "cpu"} # Force CPU usage | |
) | |
except Exception as e: | |
print(f"Warning: Failed to load fallback embeddings model: {e}") | |
embeddings = None | |
# Directory to store FAISS indexes with better naming | |
FAISS_INDEX_DIR = "faiss_indexes_tech_cpu" | |
if not os.path.exists(FAISS_INDEX_DIR): | |
os.makedirs(FAISS_INDEX_DIR) | |
# Dictionary to store user-specific vectorstores | |
user_vectorstores = {} | |
# Modern UI HTML Template | |
MODERN_UI = """ | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Tech-Vision AI | Advanced Code Analysis</title> | |
<link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@300;400;500&family=Roboto+Mono&display=swap" rel="stylesheet"> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
<style> | |
/* CSS styles from the original HTML file */ | |
:root { | |
--primary-color: #4285F4; | |
--secondary-color: #34A853; | |
--accent-color: #EA4335; | |
--yellow-color: #FBBC05; | |
--light-background: #F8F9FA; | |
--dark-text: #202124; | |
--white: #FFFFFF; | |
--border-color: #DADCE0; | |
--code-bg: #F1F3F4; | |
--shadow-sm: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24); | |
--shadow-md: 0 4px 6px rgba(0,0,0,0.1); | |
--shadow-lg: 0 10px 20px rgba(0,0,0,0.1); | |
--transition: all 0.3s cubic-bezier(.25,.8,.25,1); | |
} | |
* { | |
margin: 0; | |
padding: 0; | |
box-sizing: border-box; | |
} | |
body { | |
background-color: var(--light-background); | |
font-family: 'Roboto', sans-serif; | |
color: var(--dark-text); | |
line-height: 1.6; | |
} | |
.container { | |
max-width: 1400px; | |
margin: 0 auto; | |
padding: 0 20px; | |
} | |
/* Add all other CSS styles from the HTML file here */ | |
/* ... (include all CSS content from the provided HTML file) ... */ | |
</style> | |
</head> | |
<body> | |
<!-- Header Section --> | |
<header> | |
<div class="container"> | |
<nav class="navbar"> | |
<div class="logo"> | |
<i class="fas fa-code logo-icon"></i> | |
<span class="logo-text">Tech-Vision AI</span> | |
</div> | |
<div class="nav-links"> | |
<a href="#" class="nav-link active" data-tab-trigger="code-analysis">Code Analysis</a> | |
<a href="#" class="nav-link" data-tab-trigger="github-results">GitHub Search</a> | |
<a href="#" class="nav-link" data-tab-trigger="stack-results">Stack Overflow</a> | |
<a href="#" class="nav-link" data-tab-trigger="code-explanation">Code Explainer</a> | |
</div> | |
</nav> | |
</div> | |
</header> | |
<!-- Main Content --> | |
<div class="container"> | |
<div class="main-content"> | |
<!-- Sidebar Section --> | |
<div class="sidebar"> | |
<!-- File Upload Section --> | |
<div class="sidebar-section"> | |
<h2 class="sidebar-title"><i class="fas fa-upload"></i> Upload Code</h2> | |
<div class="file-upload" id="dropzone"> | |
<input type="file" id="file-input" hidden accept=".py,.js,.java,.cpp,.c,.cs,.php,.rb,.go,.ts"> | |
<i class="fas fa-file-code"></i> | |
<p>Drag & drop your code file here</p> | |
<span>or click to browse</span> | |
</div> | |
<button class="upload-btn" id="analyze-btn"> | |
<i class="fas fa-search"></i> Analyze Code | |
</button> | |
<div class="model-selection"> | |
<select id="model-dropdown"> | |
<option value="llama3-70b-8192">llama3-70b-8192</option> | |
<option value="mixtral-8x7b-32768">mixtral-8x7b-32768</option> | |
<option value="gemma-7b-it">gemma-7b-it</option> | |
</select> | |
</div> | |
</div> | |
<!-- Tools Section --> | |
<div class="sidebar-section"> | |
<h2 class="sidebar-title"><i class="fas fa-tools"></i> Developer Tools</h2> | |
<div class="accordion"> | |
<!-- Include all tool sections from the HTML --> | |
<!-- ... (GitHub Search, Stack Overflow, Code Explainer) ... --> | |
</div> | |
</div> | |
<!-- Status Section --> | |
<div class="sidebar-section"> | |
<h2 class="sidebar-title"><i class="fas fa-info-circle"></i> Status</h2> | |
<div id="file-status">No file uploaded yet</div> | |
</div> | |
</div> | |
<!-- Main Area --> | |
<div class="main-area"> | |
<!-- Tabs Navigation --> | |
<div class="tabs"> | |
<div class="tab active" data-tab="code-analysis">Code Analysis</div> | |
<div class="tab" data-tab="github-results">GitHub Results</div> | |
<div class="tab" data-tab="stack-results">Stack Overflow Results</div> | |
<div class="tab" data-tab="code-explanation">Code Explanation</div> | |
</div> | |
<!-- Tab Content --> | |
<div class="tab-content"> | |
<!-- Include all tab panes from the HTML --> | |
<!-- ... (Code Analysis, GitHub Results, etc) ... --> | |
</div> | |
<!-- Chat Section --> | |
<div class="chat-section"> | |
<!-- Chat interface from HTML --> | |
</div> | |
</div> | |
</div> | |
</div> | |
<script> | |
// JavaScript from the original implementation | |
document.addEventListener('DOMContentLoaded', () => { | |
// All JavaScript functionality | |
// ... (include all JavaScript from previous implementation) ... | |
}); | |
</script> | |
</body> | |
</html> | |
""" | |
# Helper functions for code analysis | |
def detect_language(extension): | |
"""Detect programming language from file extension""" | |
extension_map = { | |
".py": "Python", | |
".js": "JavaScript", | |
".java": "Java", | |
".cpp": "C++", | |
".c": "C", | |
".cs": "C#", | |
".php": "PHP", | |
".rb": "Ruby", | |
".go": "Go", | |
".ts": "TypeScript" | |
} | |
return extension_map.get(extension.lower(), "Unknown") | |
def calculate_complexity_metrics(content, language): | |
"""Calculate code complexity metrics""" | |
lines = content.split('\n') | |
total_lines = len(lines) | |
blank_lines = len([line for line in lines if not line.strip()]) | |
code_lines = total_lines - blank_lines | |
metrics = { | |
"language": language, | |
"total_lines": total_lines, | |
"code_lines": code_lines, | |
"blank_lines": blank_lines | |
} | |
return metrics | |
def generate_recommendations(metrics): | |
"""Generate code quality recommendations based on metrics""" | |
recommendations = [] | |
if metrics.get("cyclomatic_complexity", 0) > 10: | |
recommendations.append("π High cyclomatic complexity detected. Consider breaking down complex functions.") | |
if metrics.get("code_lines", 0) > 300: | |
recommendations.append("π File is quite large. Consider splitting it into multiple modules.") | |
if metrics.get("functions", 0) > 10: | |
recommendations.append("π§ Large number of functions. Consider grouping related functions into classes.") | |
if metrics.get("comments", 0) / max(metrics.get("code_lines", 1), 1) < 0.1: | |
recommendations.append("π Low comment ratio. Consider adding more documentation.") | |
return "### Recommendations\n\n" + "\n\n".join(recommendations) if recommendations else "" | |
# Function to process PDF files | |
def process_pdf(pdf_file): | |
if pdf_file is None: | |
return None, "No file uploaded", {"page_images": [], "total_pages": 0, "total_words": 0} | |
try: | |
session_id = str(uuid.uuid4()) | |
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file: | |
temp_file.write(pdf_file) | |
pdf_path = temp_file.name | |
doc = fitz.open(pdf_path) | |
texts = [page.get_text() for page in doc] | |
page_images = [] | |
for page in doc: | |
pix = page.get_pixmap() | |
img_bytes = pix.tobytes("png") | |
img_base64 = base64.b64encode(img_bytes).decode("utf-8") | |
page_images.append(img_base64) | |
total_pages = len(doc) | |
total_words = sum(len(text.split()) for text in texts) | |
doc.close() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
chunks = text_splitter.create_documents(texts) | |
vectorstore = FAISS.from_documents(chunks, embeddings) | |
index_path = os.path.join(FAISS_INDEX_DIR, session_id) | |
vectorstore.save_local(index_path) | |
user_vectorstores[session_id] = vectorstore | |
os.unlink(pdf_path) | |
pdf_state = {"page_images": page_images, "total_pages": total_pages, "total_words": total_words} | |
return session_id, f"β Successfully processed {len(chunks)} text chunks from your PDF", pdf_state | |
except Exception as e: | |
if "pdf_path" in locals() and os.path.exists(pdf_path): | |
os.unlink(pdf_path) | |
return None, f"Error processing PDF: {str(e)}", {"page_images": [], "total_pages": 0, "total_words": 0} | |
# Function to generate chatbot responses with Tech theme | |
def generate_response(message, session_id, model_name, history): | |
"""Generate chatbot responses with FAISS context enhancement""" | |
if not message: | |
return history | |
try: | |
context = "" | |
if embeddings and session_id and session_id in user_vectorstores: | |
try: | |
print(f"Performing similarity search with session: {session_id}") | |
vectorstore = user_vectorstores[session_id] | |
# Use a higher k value to get more relevant context | |
docs = vectorstore.similarity_search(message, k=5) | |
if docs: | |
# Format the context more clearly with source information | |
context = "\n\nRelevant code context from your files:\n\n" | |
for i, doc in enumerate(docs, 1): | |
source = doc.metadata.get("source", "Unknown") | |
language = doc.metadata.get("language", "Unknown") | |
context += f"--- Segment {i} from {source} ({language}) ---\n" | |
context += f"```\n{doc.page_content}\n```\n\n" | |
print(f"Found {len(docs)} relevant code segments for context.") | |
except Exception as e: | |
print(f"Warning: Failed to perform similarity search: {e}") | |
system_prompt = """You are a technical assistant specializing in software development and programming. | |
Provide clear, accurate responses with code examples when relevant. | |
Format code snippets with proper markdown code blocks and specify the language.""" | |
if context: | |
system_prompt += f"\n\nUse this context from the uploaded code files to inform your answers:{context}" | |
# Add instruction to reference specific file parts | |
system_prompt += "\nWhen discussing code from the uploaded files, specifically reference the file name and segment number." | |
completion = client.chat.completions.create( | |
model=model_name, | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": message} | |
], | |
temperature=0.7, | |
max_tokens=1024 | |
) | |
response = completion.choices[0].message.content | |
# For proper chat history handling | |
if isinstance(history, list) and history and isinstance(history[0], dict): | |
# History is in message format | |
history.append({"role": "user", "content": message}) | |
history.append({"role": "assistant", "content": response}) | |
else: | |
# Fallback for other formats | |
history.append({"role": "user", "content": message}) | |
history.append({"role": "assistant", "content": response}) | |
return history | |
except Exception as e: | |
error_msg = f"Error generating response: {str(e)}" | |
# Handle different history formats | |
if isinstance(history, list): | |
if history and isinstance(history[0], dict): | |
history.append({"role": "user", "content": message}) | |
history.append({"role": "assistant", "content": error_msg}) | |
else: | |
history.append({"role": "user", "content": message}) | |
history.append({"role": "assistant", "content": error_msg}) | |
return history | |
# Functions to update PDF viewer | |
def update_pdf_viewer(pdf_state): | |
if not pdf_state["total_pages"]: | |
return 0, None, "No PDF uploaded yet" | |
try: | |
img_data = base64.b64decode(pdf_state["page_images"][0]) | |
img = Image.open(io.BytesIO(img_data)) | |
return pdf_state["total_pages"], img, f"**Total Pages:** {pdf_state['total_pages']}\n**Total Words:** {pdf_state['total_words']}" | |
except Exception as e: | |
print(f"Error decoding image: {e}") | |
return 0, None, "Error displaying PDF" | |
def update_image(page_num, pdf_state): | |
if not pdf_state["total_pages"] or page_num < 1 or page_num > pdf_state["total_pages"]: | |
return None | |
try: | |
img_data = base64.b64decode(pdf_state["page_images"][page_num - 1]) | |
img = Image.open(io.BytesIO(img_data)) | |
return img | |
except Exception as e: | |
print(f"Error decoding image: {e}") | |
return None | |
# GitHub API integration | |
def search_github_repos(query, sort="stars", order="desc", per_page=10): | |
"""Search for GitHub repositories""" | |
try: | |
github_token = os.getenv("GITHUB_TOKEN", "") | |
headers = {} | |
if github_token: | |
headers["Authorization"] = f"token {github_token}" | |
params = { | |
"q": query, | |
"sort": sort, | |
"order": order, | |
"per_page": per_page | |
} | |
response = requests.get( | |
"https://api.github.com/search/repositories", | |
headers=headers, | |
params=params | |
) | |
if response.status_code != 200: | |
print(f"GitHub API Error: {response.status_code} - {response.text}") | |
return [] | |
data = response.json() | |
return data.get("items", []) | |
except Exception as e: | |
print(f"Error in GitHub search: {e}") | |
return [] | |
# Stack Overflow API integration | |
def search_stackoverflow(query, sort="votes", site="stackoverflow", pagesize=10): | |
"""Search for questions on Stack Overflow""" | |
try: | |
params = { | |
"order": "desc", | |
"sort": sort, | |
"site": site, | |
"pagesize": pagesize, | |
"intitle": query | |
} | |
response = requests.get( | |
"https://api.stackexchange.com/2.3/search/advanced", | |
params=params | |
) | |
if response.status_code != 200: | |
print(f"Stack Exchange API Error: {response.status_code} - {response.text}") | |
return [] | |
data = response.json() | |
# Process results to convert Unix timestamps to readable dates | |
for item in data.get("items", []): | |
if "creation_date" in item: | |
item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d") | |
return data.get("items", []) | |
except Exception as e: | |
print(f"Error in Stack Overflow search: {e}") | |
return [] | |
def get_stackoverflow_answers(question_id, site="stackoverflow"): | |
"""Get answers for a specific question on Stack Overflow""" | |
try: | |
params = { | |
"order": "desc", | |
"sort": "votes", | |
"site": site, | |
"filter": "withbody" # Include the answer body in the response | |
} | |
response = requests.get( | |
f"https://api.stackexchange.com/2.3/questions/{question_id}/answers", | |
params=params | |
) | |
if response.status_code != 200: | |
print(f"Stack Exchange API Error: {response.status_code} - {response.text}") | |
return [] | |
data = response.json() | |
# Process results | |
for item in data.get("items", []): | |
if "creation_date" in item: | |
item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d") | |
return data.get("items", []) | |
except Exception as e: | |
print(f"Error getting Stack Overflow answers: {e}") | |
return [] | |
def explain_code(code): | |
"""Explain code using LLM""" | |
try: | |
system_prompt = "You are an expert programmer and code reviewer. Your task is to explain the provided code in a clear, concise manner. Include:" | |
system_prompt += "\n1. What the code does (high-level overview)" | |
system_prompt += "\n2. Key functions/components and their purposes" | |
system_prompt += "\n3. Potential issues or optimization opportunities" | |
system_prompt += "\n4. Any best practices that are followed or violated" | |
completion = client.chat.completions.create( | |
model="llama3-70b-8192", # Using more capable model for code explanation | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": f"Explain this code:\n```\n{code}\n```"} | |
], | |
temperature=0.3, | |
max_tokens=1024 | |
) | |
explanation = completion.choices[0].message.content | |
return f"**Code Explanation:**\n\n{explanation}" | |
except Exception as e: | |
return f"Error explaining code: {str(e)}" | |
def perform_repo_search(query, language, sort_by, min_stars): | |
"""Perform GitHub repository search with UI parameters""" | |
try: | |
if not query: | |
return "Please enter a search query" | |
# Build the search query with filters | |
search_query = query | |
if language and language != "any": | |
search_query += f" language:{language}" | |
if min_stars and min_stars != "0": | |
search_query += f" stars:>={min_stars}" | |
# Map sort_by to GitHub API parameters | |
sort_param = "stars" | |
if sort_by == "updated": | |
sort_param = "updated" | |
elif sort_by == "forks": | |
sort_param = "forks" | |
results = search_github_repos(search_query, sort=sort_param) | |
if not results: | |
return "No repositories found. Try different search terms." | |
# Format results as markdown | |
markdown = "## GitHub Repository Search Results\n\n" | |
for i, repo in enumerate(results, 1): | |
markdown += f"### {i}. [{repo['full_name']}]({repo['html_url']})\n\n" | |
if repo['description']: | |
markdown += f"{repo['description']}\n\n" | |
markdown += f"**Language:** {repo['language'] or 'Not specified'}\n" | |
markdown += f"**Stars:** {repo['stargazers_count']} | **Forks:** {repo['forks_count']} | **Watchers:** {repo['watchers_count']}\n" | |
markdown += f"**Created:** {repo['created_at'][:10]} | **Updated:** {repo['updated_at'][:10]}\n\n" | |
if repo.get('topics'): | |
markdown += f"**Topics:** {', '.join(repo['topics'])}\n\n" | |
if repo.get('license') and repo['license'].get('name'): | |
markdown += f"**License:** {repo['license']['name']}\n\n" | |
markdown += f"[View Repository]({repo['html_url']}) | [Clone URL]({repo['clone_url']})\n\n" | |
markdown += "---\n\n" | |
return markdown | |
except Exception as e: | |
return f"Error searching for repositories: {str(e)}" | |
def perform_stack_search(query, tag, sort_by): | |
"""Perform Stack Overflow search with UI parameters""" | |
try: | |
if not query: | |
return "Please enter a search query" | |
# Add tag to query if specified | |
if tag and tag != "any": | |
query_with_tag = f"{query} [tag:{tag}]" | |
else: | |
query_with_tag = query | |
# Map sort_by to Stack Exchange API parameters | |
sort_param = "votes" | |
if sort_by == "newest": | |
sort_param = "creation" | |
elif sort_by == "activity": | |
sort_param = "activity" | |
results = search_stackoverflow(query_with_tag, sort=sort_param) | |
if not results: | |
return "No questions found. Try different search terms." | |
# Format results as markdown | |
markdown = "## Stack Overflow Search Results\n\n" | |
for i, question in enumerate(results, 1): | |
markdown += f"### {i}. [{question['title']}]({question['link']})\n\n" | |
# Score and answer stats | |
markdown += f"**Score:** {question['score']} | **Answers:** {question['answer_count']}" | |
if question.get('is_answered'): | |
markdown += " β (Accepted answer available)" | |
markdown += "\n\n" | |
# Tags | |
if question.get('tags'): | |
markdown += "**Tags:** " | |
for tag in question['tags']: | |
markdown += f"`{tag}` " | |
markdown += "\n\n" | |
# Asked info | |
markdown += f"**Asked:** {question['creation_date']} | **Views:** {question.get('view_count', 'N/A')}\n\n" | |
markdown += f"[View Question]({question['link']})\n\n" | |
markdown += "---\n\n" | |
return markdown | |
except Exception as e: | |
return f"Error searching Stack Overflow: {str(e)}" | |
# Modify the process_code_file function | |
def process_code_file(file_obj): | |
"""Process uploaded code files and store in FAISS index""" | |
if file_obj is None: | |
return None, "No file uploaded", {} | |
try: | |
# Handle both file objects and bytes objects | |
if isinstance(file_obj, bytes): | |
content = file_obj.decode('utf-8', errors='replace') # Added error handling | |
file_name = "uploaded_file" | |
file_extension = ".txt" # Default extension | |
else: | |
content = file_obj.read().decode('utf-8', errors='replace') # Added error handling | |
file_name = getattr(file_obj, 'name', 'uploaded_file') | |
file_extension = Path(file_name).suffix.lower() | |
language = detect_language(file_extension) | |
# Calculate metrics | |
metrics = calculate_complexity_metrics(content, language) | |
# Create vectorstore if embeddings are available | |
session_id = None | |
if embeddings: | |
try: | |
print(f"Creating FAISS index for {file_name}...") | |
# Improved chunking for code files | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, # Smaller chunks for code | |
chunk_overlap=50, | |
separators=["\n\n", "\n", " ", ""] | |
) | |
chunks = text_splitter.create_documents([content], metadatas=[{"filename": file_name, "language": language}]) | |
# Add source metadata to help with retrieval | |
for i, chunk in enumerate(chunks): | |
chunk.metadata["chunk_id"] = i | |
chunk.metadata["source"] = file_name | |
# Create and store vectorstore | |
vectorstore = FAISS.from_documents(chunks, embeddings) | |
session_id = str(uuid.uuid4()) | |
index_path = os.path.join(FAISS_INDEX_DIR, session_id) | |
vectorstore.save_local(index_path) | |
user_vectorstores[session_id] = vectorstore | |
# Add number of chunks to metrics for display | |
metrics["chunks"] = len(chunks) | |
print(f"Successfully created FAISS index with {len(chunks)} chunks.") | |
except Exception as e: | |
print(f"Warning: Failed to create vectorstore: {e}") | |
return session_id, f"β Successfully analyzed {file_name} and stored in FAISS index", metrics | |
except Exception as e: | |
return None, f"Error processing file: {str(e)}", {} | |
# Gradio Interface | |
with gr.Blocks(css=MODERN_UI, analytics_enabled=False) as demo: | |
current_session_id = gr.State(None) | |
code_state = gr.State({}) | |
# Hidden components for functionality | |
file_input = gr.File(visible=False) | |
chat_history = gr.State([]) | |
# Main UI | |
gr.HTML(MODERN_UI) | |
# Event handlers | |
def process_uploaded_file(file): | |
session_id, status_msg, metrics = process_code_file(file) | |
return { | |
"session_id": session_id, | |
"status": status_msg, | |
"metrics": metrics | |
} | |
def handle_chat(message, session_id, model_name, history): | |
updated_history = generate_response(message, session_id, model_name, history) | |
return updated_history[-1]["content"] if updated_history else "" | |
# API endpoints | |
demo.api_endpoints = [ | |
["/api/analyze", process_uploaded_file, "POST"], | |
["/api/chat", handle_chat, "POST"], | |
["/api/github_search", search_github_repos, "POST"], | |
["/api/stack_search", search_stackoverflow, "POST"], | |
["/api/explain_code", explain_code, "POST"] | |
] | |
# Launch the application | |
if __name__ == "__main__": | |
demo.launch() |