Spaces:

CosmickVisions
/

Tech-Vision

Build error

App Files Files Community

Tech-Vision / app.py

CosmickVisions

Update app.py

9c02117 verified 3 months ago

raw

history blame

27.7 kB

	import gradio as gr
	import groq
	import os
	import tempfile
	import uuid
	from dotenv import load_dotenv
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceInstructEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	import fitz # PyMuPDF
	import base64
	from PIL import Image
	import io
	import requests
	import json
	import re
	from datetime import datetime, timedelta
	from pathlib import Path
	import torch
	import numpy as np

	# Load environment variables
	load_dotenv()
	client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))

	# Initialize embeddings with error handling
	try:
	# Force CPU usage for embeddings
	embeddings = HuggingFaceInstructEmbeddings(
	model_name="hkunlp/instructor-base",
	model_kwargs={"device": "cpu"} # Force CPU usage
	)
	except Exception as e:
	print(f"Warning: Failed to load primary embeddings model: {e}")
	try:
	embeddings = HuggingFaceInstructEmbeddings(
	model_name="all-MiniLM-L6-v2",
	model_kwargs={"device": "cpu"} # Force CPU usage
	)
	except Exception as e:
	print(f"Warning: Failed to load fallback embeddings model: {e}")
	embeddings = None

	# Directory to store FAISS indexes with better naming
	FAISS_INDEX_DIR = "faiss_indexes_tech_cpu"
	if not os.path.exists(FAISS_INDEX_DIR):
	os.makedirs(FAISS_INDEX_DIR)

	# Dictionary to store user-specific vectorstores
	user_vectorstores = {}

	# Modern UI HTML Template
	MODERN_UI = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Tech-Vision AI \| Advanced Code Analysis</title>
	<link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Roboto:wght@300;400;500&family=Roboto+Mono&display=swap" rel="stylesheet">
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
	<style>
	/* CSS styles from the original HTML file */
	:root {
	--primary-color: #4285F4;
	--secondary-color: #34A853;
	--accent-color: #EA4335;
	--yellow-color: #FBBC05;
	--light-background: #F8F9FA;
	--dark-text: #202124;
	--white: #FFFFFF;
	--border-color: #DADCE0;
	--code-bg: #F1F3F4;
	--shadow-sm: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
	--shadow-md: 0 4px 6px rgba(0,0,0,0.1);
	--shadow-lg: 0 10px 20px rgba(0,0,0,0.1);
	--transition: all 0.3s cubic-bezier(.25,.8,.25,1);
	}

	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}

	body {
	background-color: var(--light-background);
	font-family: 'Roboto', sans-serif;
	color: var(--dark-text);
	line-height: 1.6;
	}

	.container {
	max-width: 1400px;
	margin: 0 auto;
	padding: 0 20px;
	}

	/* Add all other CSS styles from the HTML file here */
	/* ... (include all CSS content from the provided HTML file) ... */
	</style>
	</head>
	<body>
	<!-- Header Section -->
	<header>
	<div class="container">
	<nav class="navbar">
	<div class="logo">
	<i class="fas fa-code logo-icon"></i>
	<span class="logo-text">Tech-Vision AI</span>
	</div>
	<div class="nav-links">
	<a href="#" class="nav-link active" data-tab-trigger="code-analysis">Code Analysis</a>
	<a href="#" class="nav-link" data-tab-trigger="github-results">GitHub Search</a>
	<a href="#" class="nav-link" data-tab-trigger="stack-results">Stack Overflow</a>
	<a href="#" class="nav-link" data-tab-trigger="code-explanation">Code Explainer</a>
	</div>
	</nav>
	</div>
	</header>

	<!-- Main Content -->
	<div class="container">
	<div class="main-content">
	<!-- Sidebar Section -->
	<div class="sidebar">
	<!-- File Upload Section -->
	<div class="sidebar-section">
	<h2 class="sidebar-title"><i class="fas fa-upload"></i> Upload Code</h2>
	<div class="file-upload" id="dropzone">
	<input type="file" id="file-input" hidden accept=".py,.js,.java,.cpp,.c,.cs,.php,.rb,.go,.ts">
	<i class="fas fa-file-code"></i>
	<p>Drag & drop your code file here</p>
	<span>or click to browse</span>
	</div>
	<button class="upload-btn" id="analyze-btn">
	<i class="fas fa-search"></i> Analyze Code
	</button>
	<div class="model-selection">
	<select id="model-dropdown">
	<option value="llama3-70b-8192">llama3-70b-8192</option>
	<option value="mixtral-8x7b-32768">mixtral-8x7b-32768</option>
	<option value="gemma-7b-it">gemma-7b-it</option>
	</select>
	</div>
	</div>

	<!-- Tools Section -->
	<div class="sidebar-section">
	<h2 class="sidebar-title"><i class="fas fa-tools"></i> Developer Tools</h2>
	<div class="accordion">
	<!-- Include all tool sections from the HTML -->
	<!-- ... (GitHub Search, Stack Overflow, Code Explainer) ... -->
	</div>
	</div>

	<!-- Status Section -->
	<div class="sidebar-section">
	<h2 class="sidebar-title"><i class="fas fa-info-circle"></i> Status</h2>
	<div id="file-status">No file uploaded yet</div>
	</div>
	</div>

	<!-- Main Area -->
	<div class="main-area">
	<!-- Tabs Navigation -->
	<div class="tabs">
	<div class="tab active" data-tab="code-analysis">Code Analysis</div>
	<div class="tab" data-tab="github-results">GitHub Results</div>
	<div class="tab" data-tab="stack-results">Stack Overflow Results</div>
	<div class="tab" data-tab="code-explanation">Code Explanation</div>
	</div>

	<!-- Tab Content -->
	<div class="tab-content">
	<!-- Include all tab panes from the HTML -->
	<!-- ... (Code Analysis, GitHub Results, etc) ... -->
	</div>

	<!-- Chat Section -->
	<div class="chat-section">
	<!-- Chat interface from HTML -->
	</div>
	</div>
	</div>
	</div>

	<script>
	// JavaScript from the original implementation
	document.addEventListener('DOMContentLoaded', () => {
	// All JavaScript functionality
	// ... (include all JavaScript from previous implementation) ...
	});
	</script>
	</body>
	</html>
	"""

	# Helper functions for code analysis
	def detect_language(extension):
	"""Detect programming language from file extension"""
	extension_map = {
	".py": "Python",
	".js": "JavaScript",
	".java": "Java",
	".cpp": "C++",
	".c": "C",
	".cs": "C#",
	".php": "PHP",
	".rb": "Ruby",
	".go": "Go",
	".ts": "TypeScript"
	}
	return extension_map.get(extension.lower(), "Unknown")

	def calculate_complexity_metrics(content, language):
	"""Calculate code complexity metrics"""
	lines = content.split('\n')
	total_lines = len(lines)
	blank_lines = len([line for line in lines if not line.strip()])
	code_lines = total_lines - blank_lines

	metrics = {
	"language": language,
	"total_lines": total_lines,
	"code_lines": code_lines,
	"blank_lines": blank_lines
	}

	return metrics

	def generate_recommendations(metrics):
	"""Generate code quality recommendations based on metrics"""
	recommendations = []

	if metrics.get("cyclomatic_complexity", 0) > 10:
	recommendations.append("🔄 High cyclomatic complexity detected. Consider breaking down complex functions.")

	if metrics.get("code_lines", 0) > 300:
	recommendations.append("📏 File is quite large. Consider splitting it into multiple modules.")

	if metrics.get("functions", 0) > 10:
	recommendations.append("🔧 Large number of functions. Consider grouping related functions into classes.")

	if metrics.get("comments", 0) / max(metrics.get("code_lines", 1), 1) < 0.1:
	recommendations.append("📝 Low comment ratio. Consider adding more documentation.")

	return "### Recommendations\n\n" + "\n\n".join(recommendations) if recommendations else ""

	# Function to process PDF files
	def process_pdf(pdf_file):
	if pdf_file is None:
	return None, "No file uploaded", {"page_images": [], "total_pages": 0, "total_words": 0}
	try:
	session_id = str(uuid.uuid4())
	with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file:
	temp_file.write(pdf_file)
	pdf_path = temp_file.name

	doc = fitz.open(pdf_path)
	texts = [page.get_text() for page in doc]
	page_images = []
	for page in doc:
	pix = page.get_pixmap()
	img_bytes = pix.tobytes("png")
	img_base64 = base64.b64encode(img_bytes).decode("utf-8")
	page_images.append(img_base64)
	total_pages = len(doc)
	total_words = sum(len(text.split()) for text in texts)
	doc.close()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	chunks = text_splitter.create_documents(texts)
	vectorstore = FAISS.from_documents(chunks, embeddings)
	index_path = os.path.join(FAISS_INDEX_DIR, session_id)
	vectorstore.save_local(index_path)
	user_vectorstores[session_id] = vectorstore

	os.unlink(pdf_path)
	pdf_state = {"page_images": page_images, "total_pages": total_pages, "total_words": total_words}
	return session_id, f"✅ Successfully processed {len(chunks)} text chunks from your PDF", pdf_state
	except Exception as e:
	if "pdf_path" in locals() and os.path.exists(pdf_path):
	os.unlink(pdf_path)
	return None, f"Error processing PDF: {str(e)}", {"page_images": [], "total_pages": 0, "total_words": 0}

	# Function to generate chatbot responses with Tech theme
	def generate_response(message, session_id, model_name, history):
	"""Generate chatbot responses with FAISS context enhancement"""
	if not message:
	return history

	try:
	context = ""
	if embeddings and session_id and session_id in user_vectorstores:
	try:
	print(f"Performing similarity search with session: {session_id}")
	vectorstore = user_vectorstores[session_id]

	# Use a higher k value to get more relevant context
	docs = vectorstore.similarity_search(message, k=5)

	if docs:
	# Format the context more clearly with source information
	context = "\n\nRelevant code context from your files:\n\n"
	for i, doc in enumerate(docs, 1):
	source = doc.metadata.get("source", "Unknown")
	language = doc.metadata.get("language", "Unknown")
	context += f"--- Segment {i} from {source} ({language}) ---\n"
	context += f"```\n{doc.page_content}\n```\n\n"

	print(f"Found {len(docs)} relevant code segments for context.")
	except Exception as e:
	print(f"Warning: Failed to perform similarity search: {e}")

	system_prompt = """You are a technical assistant specializing in software development and programming.
	Provide clear, accurate responses with code examples when relevant.
	Format code snippets with proper markdown code blocks and specify the language."""

	if context:
	system_prompt += f"\n\nUse this context from the uploaded code files to inform your answers:{context}"

	# Add instruction to reference specific file parts
	system_prompt += "\nWhen discussing code from the uploaded files, specifically reference the file name and segment number."

	completion = client.chat.completions.create(
	model=model_name,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": message}
	],
	temperature=0.7,
	max_tokens=1024
	)

	response = completion.choices[0].message.content

	# For proper chat history handling
	if isinstance(history, list) and history and isinstance(history[0], dict):
	# History is in message format
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": response})
	else:
	# Fallback for other formats
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": response})

	return history

	except Exception as e:
	error_msg = f"Error generating response: {str(e)}"

	# Handle different history formats
	if isinstance(history, list):
	if history and isinstance(history[0], dict):
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": error_msg})
	else:
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": error_msg})

	return history

	# Functions to update PDF viewer
	def update_pdf_viewer(pdf_state):
	if not pdf_state["total_pages"]:
	return 0, None, "No PDF uploaded yet"
	try:
	img_data = base64.b64decode(pdf_state["page_images"][0])
	img = Image.open(io.BytesIO(img_data))
	return pdf_state["total_pages"], img, f"Total Pages: {pdf_state['total_pages']}\nTotal Words: {pdf_state['total_words']}"
	except Exception as e:
	print(f"Error decoding image: {e}")
	return 0, None, "Error displaying PDF"

	def update_image(page_num, pdf_state):
	if not pdf_state["total_pages"] or page_num < 1 or page_num > pdf_state["total_pages"]:
	return None
	try:
	img_data = base64.b64decode(pdf_state["page_images"][page_num - 1])
	img = Image.open(io.BytesIO(img_data))
	return img
	except Exception as e:
	print(f"Error decoding image: {e}")
	return None

	# GitHub API integration
	def search_github_repos(query, sort="stars", order="desc", per_page=10):
	"""Search for GitHub repositories"""
	try:
	github_token = os.getenv("GITHUB_TOKEN", "")
	headers = {}
	if github_token:
	headers["Authorization"] = f"token {github_token}"

	params = {
	"q": query,
	"sort": sort,
	"order": order,
	"per_page": per_page
	}

	response = requests.get(
	"https://api.github.com/search/repositories",
	headers=headers,
	params=params
	)

	if response.status_code != 200:
	print(f"GitHub API Error: {response.status_code} - {response.text}")
	return []

	data = response.json()
	return data.get("items", [])
	except Exception as e:
	print(f"Error in GitHub search: {e}")
	return []

	# Stack Overflow API integration
	def search_stackoverflow(query, sort="votes", site="stackoverflow", pagesize=10):
	"""Search for questions on Stack Overflow"""
	try:
	params = {
	"order": "desc",
	"sort": sort,
	"site": site,
	"pagesize": pagesize,
	"intitle": query
	}

	response = requests.get(
	"https://api.stackexchange.com/2.3/search/advanced",
	params=params
	)

	if response.status_code != 200:
	print(f"Stack Exchange API Error: {response.status_code} - {response.text}")
	return []

	data = response.json()

	# Process results to convert Unix timestamps to readable dates
	for item in data.get("items", []):
	if "creation_date" in item:
	item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d")

	return data.get("items", [])
	except Exception as e:
	print(f"Error in Stack Overflow search: {e}")
	return []

	def get_stackoverflow_answers(question_id, site="stackoverflow"):
	"""Get answers for a specific question on Stack Overflow"""
	try:
	params = {
	"order": "desc",
	"sort": "votes",
	"site": site,
	"filter": "withbody" # Include the answer body in the response
	}

	response = requests.get(
	f"https://api.stackexchange.com/2.3/questions/{question_id}/answers",
	params=params
	)

	if response.status_code != 200:
	print(f"Stack Exchange API Error: {response.status_code} - {response.text}")
	return []

	data = response.json()

	# Process results
	for item in data.get("items", []):
	if "creation_date" in item:
	item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d")

	return data.get("items", [])
	except Exception as e:
	print(f"Error getting Stack Overflow answers: {e}")
	return []

	def explain_code(code):
	"""Explain code using LLM"""
	try:
	system_prompt = "You are an expert programmer and code reviewer. Your task is to explain the provided code in a clear, concise manner. Include:"
	system_prompt += "\n1. What the code does (high-level overview)"
	system_prompt += "\n2. Key functions/components and their purposes"
	system_prompt += "\n3. Potential issues or optimization opportunities"
	system_prompt += "\n4. Any best practices that are followed or violated"

	completion = client.chat.completions.create(
	model="llama3-70b-8192", # Using more capable model for code explanation
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": f"Explain this code:\n```\n{code}\n```"}
	],
	temperature=0.3,
	max_tokens=1024
	)

	explanation = completion.choices[0].message.content
	return f"Code Explanation:\n\n{explanation}"
	except Exception as e:
	return f"Error explaining code: {str(e)}"

	def perform_repo_search(query, language, sort_by, min_stars):
	"""Perform GitHub repository search with UI parameters"""
	try:
	if not query:
	return "Please enter a search query"

	# Build the search query with filters
	search_query = query
	if language and language != "any":
	search_query += f" language:{language}"
	if min_stars and min_stars != "0":
	search_query += f" stars:>={min_stars}"

	# Map sort_by to GitHub API parameters
	sort_param = "stars"
	if sort_by == "updated":
	sort_param = "updated"
	elif sort_by == "forks":
	sort_param = "forks"

	results = search_github_repos(search_query, sort=sort_param)

	if not results:
	return "No repositories found. Try different search terms."

	# Format results as markdown
	markdown = "## GitHub Repository Search Results\n\n"

	for i, repo in enumerate(results, 1):
	markdown += f"### {i}. [{repo['full_name']}]({repo['html_url']})\n\n"

	if repo['description']:
	markdown += f"{repo['description']}\n\n"

	markdown += f"Language: {repo['language'] or 'Not specified'}\n"
	markdown += f"Stars: {repo['stargazers_count']} \| Forks: {repo['forks_count']} \| Watchers: {repo['watchers_count']}\n"
	markdown += f"Created: {repo['created_at'][:10]} \| Updated: {repo['updated_at'][:10]}\n\n"

	if repo.get('topics'):
	markdown += f"Topics: {', '.join(repo['topics'])}\n\n"

	if repo.get('license') and repo['license'].get('name'):
	markdown += f"License: {repo['license']['name']}\n\n"

	markdown += f"[View Repository]({repo['html_url']}) \| [Clone URL]({repo['clone_url']})\n\n"
	markdown += "---\n\n"

	return markdown
	except Exception as e:
	return f"Error searching for repositories: {str(e)}"

	def perform_stack_search(query, tag, sort_by):
	"""Perform Stack Overflow search with UI parameters"""
	try:
	if not query:
	return "Please enter a search query"

	# Add tag to query if specified
	if tag and tag != "any":
	query_with_tag = f"{query} [tag:{tag}]"
	else:
	query_with_tag = query

	# Map sort_by to Stack Exchange API parameters
	sort_param = "votes"
	if sort_by == "newest":
	sort_param = "creation"
	elif sort_by == "activity":
	sort_param = "activity"

	results = search_stackoverflow(query_with_tag, sort=sort_param)

	if not results:
	return "No questions found. Try different search terms."

	# Format results as markdown
	markdown = "## Stack Overflow Search Results\n\n"

	for i, question in enumerate(results, 1):
	markdown += f"### {i}. [{question['title']}]({question['link']})\n\n"

	# Score and answer stats
	markdown += f"Score: {question['score']} \| Answers: {question['answer_count']}"
	if question.get('is_answered'):
	markdown += " ✓ (Accepted answer available)"
	markdown += "\n\n"

	# Tags
	if question.get('tags'):
	markdown += "Tags: "
	for tag in question['tags']:
	markdown += f"`{tag}` "
	markdown += "\n\n"

	# Asked info
	markdown += f"Asked: {question['creation_date']} \| Views: {question.get('view_count', 'N/A')}\n\n"

	markdown += f"[View Question]({question['link']})\n\n"
	markdown += "---\n\n"

	return markdown
	except Exception as e:
	return f"Error searching Stack Overflow: {str(e)}"

	# Modify the process_code_file function
	def process_code_file(file_obj):
	"""Process uploaded code files and store in FAISS index"""
	if file_obj is None:
	return None, "No file uploaded", {}

	try:
	# Handle both file objects and bytes objects
	if isinstance(file_obj, bytes):
	content = file_obj.decode('utf-8', errors='replace') # Added error handling
	file_name = "uploaded_file"
	file_extension = ".txt" # Default extension
	else:
	content = file_obj.read().decode('utf-8', errors='replace') # Added error handling
	file_name = getattr(file_obj, 'name', 'uploaded_file')
	file_extension = Path(file_name).suffix.lower()

	language = detect_language(file_extension)

	# Calculate metrics
	metrics = calculate_complexity_metrics(content, language)

	# Create vectorstore if embeddings are available
	session_id = None
	if embeddings:
	try:
	print(f"Creating FAISS index for {file_name}...")
	# Improved chunking for code files
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500, # Smaller chunks for code
	chunk_overlap=50,
	separators=["\n\n", "\n", " ", ""]
	)
	chunks = text_splitter.create_documents([content], metadatas=[{"filename": file_name, "language": language}])

	# Add source metadata to help with retrieval
	for i, chunk in enumerate(chunks):
	chunk.metadata["chunk_id"] = i
	chunk.metadata["source"] = file_name

	# Create and store vectorstore
	vectorstore = FAISS.from_documents(chunks, embeddings)
	session_id = str(uuid.uuid4())
	index_path = os.path.join(FAISS_INDEX_DIR, session_id)
	vectorstore.save_local(index_path)
	user_vectorstores[session_id] = vectorstore

	# Add number of chunks to metrics for display
	metrics["chunks"] = len(chunks)
	print(f"Successfully created FAISS index with {len(chunks)} chunks.")
	except Exception as e:
	print(f"Warning: Failed to create vectorstore: {e}")

	return session_id, f"✅ Successfully analyzed {file_name} and stored in FAISS index", metrics
	except Exception as e:
	return None, f"Error processing file: {str(e)}", {}

	# Gradio Interface
	with gr.Blocks(css=MODERN_UI, analytics_enabled=False) as demo:
	current_session_id = gr.State(None)
	code_state = gr.State({})

	# Hidden components for functionality
	file_input = gr.File(visible=False)
	chat_history = gr.State([])

	# Main UI
	gr.HTML(MODERN_UI)

	# Event handlers
	def process_uploaded_file(file):
	session_id, status_msg, metrics = process_code_file(file)
	return {
	"session_id": session_id,
	"status": status_msg,
	"metrics": metrics
	}

	def handle_chat(message, session_id, model_name, history):
	updated_history = generate_response(message, session_id, model_name, history)
	return updated_history[-1]["content"] if updated_history else ""

	# API endpoints
	demo.api_endpoints = [
	["/api/analyze", process_uploaded_file, "POST"],
	["/api/chat", handle_chat, "POST"],
	["/api/github_search", search_github_repos, "POST"],
	["/api/stack_search", search_stackoverflow, "POST"],
	["/api/explain_code", explain_code, "POST"]
	]

	# Launch the application
	if __name__ == "__main__":
	demo.launch()