import gradio as gr import groq import os import tempfile import uuid from dotenv import load_dotenv from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter import fitz # PyMuPDF import base64 from PIL import Image import io import requests import json import re from datetime import datetime, timedelta from pathlib import Path import torch import numpy as np # Load environment variables load_dotenv() client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY")) # Initialize embeddings with error handling try: # Force CPU usage for embeddings embeddings = HuggingFaceInstructEmbeddings( model_name="hkunlp/instructor-base", model_kwargs={"device": "cpu"} # Force CPU usage ) except Exception as e: print(f"Warning: Failed to load primary embeddings model: {e}") try: embeddings = HuggingFaceInstructEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"} # Force CPU usage ) except Exception as e: print(f"Warning: Failed to load fallback embeddings model: {e}") embeddings = None # Directory to store FAISS indexes with better naming FAISS_INDEX_DIR = "faiss_indexes_tech_cpu" if not os.path.exists(FAISS_INDEX_DIR): os.makedirs(FAISS_INDEX_DIR) # Dictionary to store user-specific vectorstores user_vectorstores = {} # Custom CSS for Tech theme custom_css = """ :root { --primary-color: #4285F4; --secondary-color: #34A853; --accent-color: #EA4335; --light-background: #F8F9FA; --dark-text: #202124; --white: #FFFFFF; --border-color: #DADCE0; --code-bg: #F1F3F4; } body { background-color: var(--light-background); font-family: 'Google Sans', 'Roboto', sans-serif; } .container { max-width: 1200px !important; margin: 0 auto !important; padding: 10px; } .header { background-color: var(--white); border-bottom: 1px solid var(--border-color); padding: 15px 0; margin-bottom: 20px; border-radius: 12px 12px 0 0; box-shadow: 0 2px 4px rgba(0,0,0,0.05); } .header-title { color: var(--primary-color); font-size: 1.8rem; font-weight: 700; text-align: center; } .header-subtitle { color: var(--dark-text); font-size: 1rem; text-align: center; margin-top: 5px; } .chat-container { border-radius: 12px !important; box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important; background-color: var(--white) !important; border: 1px solid var(--border-color) !important; min-height: 500px; } .tool-container { background-color: var(--white); border-radius: 12px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); padding: 15px; margin-bottom: 20px; } .code-block { background-color: var(--code-bg); padding: 12px; border-radius: 8px; font-family: 'Roboto Mono', monospace; overflow-x: auto; margin: 10px 0; border-left: 3px solid var(--primary-color); } """ # Helper functions for code analysis def detect_language(extension): """Detect programming language from file extension""" extension_map = { ".py": "Python", ".js": "JavaScript", ".java": "Java", ".cpp": "C++", ".c": "C", ".cs": "C#", ".php": "PHP", ".rb": "Ruby", ".go": "Go", ".ts": "TypeScript" } return extension_map.get(extension.lower(), "Unknown") def calculate_complexity_metrics(content, language): """Calculate code complexity metrics""" lines = content.split('\n') total_lines = len(lines) blank_lines = len([line for line in lines if not line.strip()]) code_lines = total_lines - blank_lines metrics = { "language": language, "total_lines": total_lines, "code_lines": code_lines, "blank_lines": blank_lines } return metrics def generate_recommendations(metrics): """Generate code quality recommendations based on metrics""" recommendations = [] if metrics.get("cyclomatic_complexity", 0) > 10: recommendations.append("🔄 High cyclomatic complexity detected. Consider breaking down complex functions.") if metrics.get("code_lines", 0) > 300: recommendations.append("📏 File is quite large. Consider splitting it into multiple modules.") if metrics.get("functions", 0) > 10: recommendations.append("🔧 Large number of functions. Consider grouping related functions into classes.") if metrics.get("comments", 0) / max(metrics.get("code_lines", 1), 1) < 0.1: recommendations.append("📝 Low comment ratio. Consider adding more documentation.") return "### Recommendations\n\n" + "\n\n".join(recommendations) if recommendations else "" # Function to process PDF files def process_pdf(pdf_file): if pdf_file is None: return None, "No file uploaded", {"page_images": [], "total_pages": 0, "total_words": 0} try: session_id = str(uuid.uuid4()) with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_file: temp_file.write(pdf_file) pdf_path = temp_file.name doc = fitz.open(pdf_path) texts = [page.get_text() for page in doc] page_images = [] for page in doc: pix = page.get_pixmap() img_bytes = pix.tobytes("png") img_base64 = base64.b64encode(img_bytes).decode("utf-8") page_images.append(img_base64) total_pages = len(doc) total_words = sum(len(text.split()) for text in texts) doc.close() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.create_documents(texts) vectorstore = FAISS.from_documents(chunks, embeddings) index_path = os.path.join(FAISS_INDEX_DIR, session_id) vectorstore.save_local(index_path) user_vectorstores[session_id] = vectorstore os.unlink(pdf_path) pdf_state = {"page_images": page_images, "total_pages": total_pages, "total_words": total_words} return session_id, f"✅ Successfully processed {len(chunks)} text chunks from your PDF", pdf_state except Exception as e: if "pdf_path" in locals() and os.path.exists(pdf_path): os.unlink(pdf_path) return None, f"Error processing PDF: {str(e)}", {"page_images": [], "total_pages": 0, "total_words": 0} # Function to generate chatbot responses with Tech theme def generate_response(message, session_id, model_name, history): """Generate chatbot responses with FAISS context enhancement""" if not message: return history try: context = "" if embeddings and session_id and session_id in user_vectorstores: try: print(f"Performing similarity search with session: {session_id}") vectorstore = user_vectorstores[session_id] # Use a higher k value to get more relevant context docs = vectorstore.similarity_search(message, k=5) if docs: # Format the context more clearly with source information context = "\n\nRelevant code context from your files:\n\n" for i, doc in enumerate(docs, 1): source = doc.metadata.get("source", "Unknown") language = doc.metadata.get("language", "Unknown") context += f"--- Segment {i} from {source} ({language}) ---\n" context += f"```\n{doc.page_content}\n```\n\n" print(f"Found {len(docs)} relevant code segments for context.") except Exception as e: print(f"Warning: Failed to perform similarity search: {e}") system_prompt = """You are a technical assistant specializing in software development and programming. Provide clear, accurate responses with code examples when relevant. Format code snippets with proper markdown code blocks and specify the language.""" if context: system_prompt += f"\n\nUse this context from the uploaded code files to inform your answers:{context}" # Add instruction to reference specific file parts system_prompt += "\nWhen discussing code from the uploaded files, specifically reference the file name and segment number." completion = client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": message} ], temperature=0.7, max_tokens=1024 ) response = completion.choices[0].message.content # For proper chat history handling if isinstance(history, list) and history and isinstance(history[0], dict): # History is in message format history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) else: # Fallback for other formats history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) return history except Exception as e: error_msg = f"Error generating response: {str(e)}" # Handle different history formats if isinstance(history, list): if history and isinstance(history[0], dict): history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": error_msg}) else: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": error_msg}) return history # Functions to update PDF viewer def update_pdf_viewer(pdf_state): if not pdf_state["total_pages"]: return 0, None, "No PDF uploaded yet" try: img_data = base64.b64decode(pdf_state["page_images"][0]) img = Image.open(io.BytesIO(img_data)) return pdf_state["total_pages"], img, f"**Total Pages:** {pdf_state['total_pages']}\n**Total Words:** {pdf_state['total_words']}" except Exception as e: print(f"Error decoding image: {e}") return 0, None, "Error displaying PDF" def update_image(page_num, pdf_state): if not pdf_state["total_pages"] or page_num < 1 or page_num > pdf_state["total_pages"]: return None try: img_data = base64.b64decode(pdf_state["page_images"][page_num - 1]) img = Image.open(io.BytesIO(img_data)) return img except Exception as e: print(f"Error decoding image: {e}") return None # GitHub API integration def search_github_repos(query, sort="stars", order="desc", per_page=10): """Search for GitHub repositories""" try: github_token = os.getenv("GITHUB_TOKEN", "") headers = {} if github_token: headers["Authorization"] = f"token {github_token}" params = { "q": query, "sort": sort, "order": order, "per_page": per_page } response = requests.get( "https://api.github.com/search/repositories", headers=headers, params=params ) if response.status_code != 200: print(f"GitHub API Error: {response.status_code} - {response.text}") return [] data = response.json() return data.get("items", []) except Exception as e: print(f"Error in GitHub search: {e}") return [] # Stack Overflow API integration def search_stackoverflow(query, sort="votes", site="stackoverflow", pagesize=10): """Search for questions on Stack Overflow""" try: params = { "order": "desc", "sort": sort, "site": site, "pagesize": pagesize, "intitle": query } response = requests.get( "https://api.stackexchange.com/2.3/search/advanced", params=params ) if response.status_code != 200: print(f"Stack Exchange API Error: {response.status_code} - {response.text}") return [] data = response.json() # Process results to convert Unix timestamps to readable dates for item in data.get("items", []): if "creation_date" in item: item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d") return data.get("items", []) except Exception as e: print(f"Error in Stack Overflow search: {e}") return [] def get_stackoverflow_answers(question_id, site="stackoverflow"): """Get answers for a specific question on Stack Overflow""" try: params = { "order": "desc", "sort": "votes", "site": site, "filter": "withbody" # Include the answer body in the response } response = requests.get( f"https://api.stackexchange.com/2.3/questions/{question_id}/answers", params=params ) if response.status_code != 200: print(f"Stack Exchange API Error: {response.status_code} - {response.text}") return [] data = response.json() # Process results for item in data.get("items", []): if "creation_date" in item: item["creation_date"] = datetime.fromtimestamp(item["creation_date"]).strftime("%Y-%m-%d") return data.get("items", []) except Exception as e: print(f"Error getting Stack Overflow answers: {e}") return [] def explain_code(code): """Explain code using LLM""" try: system_prompt = "You are an expert programmer and code reviewer. Your task is to explain the provided code in a clear, concise manner. Include:" system_prompt += "\n1. What the code does (high-level overview)" system_prompt += "\n2. Key functions/components and their purposes" system_prompt += "\n3. Potential issues or optimization opportunities" system_prompt += "\n4. Any best practices that are followed or violated" completion = client.chat.completions.create( model="llama3-70b-8192", # Using more capable model for code explanation messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Explain this code:\n```\n{code}\n```"} ], temperature=0.3, max_tokens=1024 ) explanation = completion.choices[0].message.content return f"**Code Explanation:**\n\n{explanation}" except Exception as e: return f"Error explaining code: {str(e)}" def perform_repo_search(query, language, sort_by, min_stars): """Perform GitHub repository search with UI parameters""" try: if not query: return "Please enter a search query" # Build the search query with filters search_query = query if language and language != "any": search_query += f" language:{language}" if min_stars and min_stars != "0": search_query += f" stars:>={min_stars}" # Map sort_by to GitHub API parameters sort_param = "stars" if sort_by == "updated": sort_param = "updated" elif sort_by == "forks": sort_param = "forks" results = search_github_repos(search_query, sort=sort_param) if not results: return "No repositories found. Try different search terms." # Format results as markdown markdown = "## GitHub Repository Search Results\n\n" for i, repo in enumerate(results, 1): markdown += f"### {i}. [{repo['full_name']}]({repo['html_url']})\n\n" if repo['description']: markdown += f"{repo['description']}\n\n" markdown += f"**Language:** {repo['language'] or 'Not specified'}\n" markdown += f"**Stars:** {repo['stargazers_count']} | **Forks:** {repo['forks_count']} | **Watchers:** {repo['watchers_count']}\n" markdown += f"**Created:** {repo['created_at'][:10]} | **Updated:** {repo['updated_at'][:10]}\n\n" if repo.get('topics'): markdown += f"**Topics:** {', '.join(repo['topics'])}\n\n" if repo.get('license') and repo['license'].get('name'): markdown += f"**License:** {repo['license']['name']}\n\n" markdown += f"[View Repository]({repo['html_url']}) | [Clone URL]({repo['clone_url']})\n\n" markdown += "---\n\n" return markdown except Exception as e: return f"Error searching for repositories: {str(e)}" def perform_stack_search(query, tag, sort_by): """Perform Stack Overflow search with UI parameters""" try: if not query: return "Please enter a search query" # Add tag to query if specified if tag and tag != "any": query_with_tag = f"{query} [tag:{tag}]" else: query_with_tag = query # Map sort_by to Stack Exchange API parameters sort_param = "votes" if sort_by == "newest": sort_param = "creation" elif sort_by == "activity": sort_param = "activity" results = search_stackoverflow(query_with_tag, sort=sort_param) if not results: return "No questions found. Try different search terms." # Format results as markdown markdown = "## Stack Overflow Search Results\n\n" for i, question in enumerate(results, 1): markdown += f"### {i}. [{question['title']}]({question['link']})\n\n" # Score and answer stats markdown += f"**Score:** {question['score']} | **Answers:** {question['answer_count']}" if question.get('is_answered'): markdown += " ✓ (Accepted answer available)" markdown += "\n\n" # Tags if question.get('tags'): markdown += "**Tags:** " for tag in question['tags']: markdown += f"`{tag}` " markdown += "\n\n" # Asked info markdown += f"**Asked:** {question['creation_date']} | **Views:** {question.get('view_count', 'N/A')}\n\n" markdown += f"[View Question]({question['link']})\n\n" markdown += "---\n\n" return markdown except Exception as e: return f"Error searching Stack Overflow: {str(e)}" # Modify the process_code_file function def process_code_file(file_obj): """Process uploaded code files and store in FAISS index""" if file_obj is None: return None, "No file uploaded", {} try: # Handle both file objects and bytes objects if isinstance(file_obj, bytes): content = file_obj.decode('utf-8', errors='replace') # Added error handling file_name = "uploaded_file" file_extension = ".txt" # Default extension else: content = file_obj.read().decode('utf-8', errors='replace') # Added error handling file_name = getattr(file_obj, 'name', 'uploaded_file') file_extension = Path(file_name).suffix.lower() language = detect_language(file_extension) # Calculate metrics metrics = calculate_complexity_metrics(content, language) # Create vectorstore if embeddings are available session_id = None if embeddings: try: print(f"Creating FAISS index for {file_name}...") # Improved chunking for code files text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, # Smaller chunks for code chunk_overlap=50, separators=["\n\n", "\n", " ", ""] ) chunks = text_splitter.create_documents([content], metadatas=[{"filename": file_name, "language": language}]) # Add source metadata to help with retrieval for i, chunk in enumerate(chunks): chunk.metadata["chunk_id"] = i chunk.metadata["source"] = file_name # Create and store vectorstore vectorstore = FAISS.from_documents(chunks, embeddings) session_id = str(uuid.uuid4()) index_path = os.path.join(FAISS_INDEX_DIR, session_id) vectorstore.save_local(index_path) user_vectorstores[session_id] = vectorstore # Add number of chunks to metrics for display metrics["chunks"] = len(chunks) print(f"Successfully created FAISS index with {len(chunks)} chunks.") except Exception as e: print(f"Warning: Failed to create vectorstore: {e}") return session_id, f"✅ Successfully analyzed {file_name} and stored in FAISS index", metrics except Exception as e: return None, f"Error processing file: {str(e)}", {} # Update the Gradio interface with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: current_session_id = gr.State(None) code_state = gr.State({}) gr.HTML("""
Tech-Vision AI
Advanced Code Analysis & Technical Assistant
""") with gr.Row(elem_classes="container"): with gr.Column(scale=1, min_width=300): file_input = gr.File( label="Upload Code File", file_types=[".py", ".js", ".java", ".cpp", ".c", ".cs", ".php", ".rb", ".go", ".ts"], type="binary" ) upload_button = gr.Button("Analyze Code", variant="primary") file_status = gr.Markdown("No file uploaded yet") model_dropdown = gr.Dropdown( choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], value="llama3-70b-8192", label="Select Model" ) # Developer Tools Section gr.Markdown("### Developer Tools", elem_classes="tool-title") with gr.Group(elem_classes="tool-container"): # Replace Box with Group with gr.Tabs(): with gr.TabItem("GitHub Search"): repo_query = gr.Textbox(label="Search Query", placeholder="Enter keywords to search for repositories") with gr.Row(): language = gr.Dropdown( choices=["any", "JavaScript", "Python", "Java", "C++", "TypeScript", "Go", "Rust", "PHP", "C#"], value="any", label="Language" ) min_stars = gr.Dropdown( choices=["0", "10", "50", "100", "1000", "10000"], value="0", label="Min Stars" ) sort_by = gr.Dropdown( choices=["stars", "forks", "updated"], value="stars", label="Sort By" ) repo_search_btn = gr.Button("Search Repositories") with gr.TabItem("Stack Overflow"): stack_query = gr.Textbox(label="Search Query", placeholder="Enter your technical question") with gr.Row(): tag = gr.Dropdown( choices=["any", "python", "javascript", "java", "c++", "react", "node.js", "android", "ios", "sql"], value="any", label="Tag" ) so_sort_by = gr.Dropdown( choices=["votes", "newest", "activity"], value="votes", label="Sort By" ) so_search_btn = gr.Button("Search Stack Overflow") with gr.TabItem("Code Explainer"): code_input = gr.Textbox( label="Code to Explain", placeholder="Paste your code here...", lines=10 ) explain_btn = gr.Button("Explain Code") with gr.Column(scale=2, min_width=600): with gr.Tabs(): with gr.TabItem("Code Analysis"): with gr.Column(elem_classes="code-viewer-container"): code_metrics = gr.Markdown("No code analyzed yet", elem_classes="stats-box") code_recommendations = gr.Markdown("", elem_classes="recommendations-box") with gr.TabItem("GitHub Results"): repo_results = gr.Markdown("Search for repositories to see results here") with gr.TabItem("Stack Overflow Results"): stack_results = gr.Markdown("Search for questions to see results here") with gr.TabItem("Code Explanation"): code_explanation = gr.Markdown("Paste your code and click 'Explain Code' to see an explanation here") with gr.Row(elem_classes="container"): with gr.Column(scale=2, min_width=600): chatbot = gr.Chatbot( height=500, show_copy_button=True, elem_classes="chat-container", type="messages" ) with gr.Row(): msg = gr.Textbox( show_label=False, placeholder="Ask about your code, type /github to search repos, or /stack to search Stack Overflow...", scale=5 ) send_btn = gr.Button("Send", scale=1) clear_btn = gr.Button("Clear Conversation") # Update event handlers upload_button.click( lambda x: process_code_file(x), inputs=[file_input], outputs=[current_session_id, file_status, code_state] ).then( lambda state: ( f"### Code Analysis Results\n\n" f"**Language:** {state.get('language', 'Unknown')}\n" f"**Total Lines:** {state.get('total_lines', 0)}\n" f"**Code Lines:** {state.get('code_lines', 0)}\n" f"**Comment Lines:** {state.get('comments', 0)}\n" f"**Functions:** {state.get('functions', 0)}\n" f"**Classes:** {state.get('classes', 0)}\n" f"**Complexity Score:** {state.get('cyclomatic_complexity', 0)}\n" ), inputs=[code_state], outputs=[code_metrics] ).then( lambda state: generate_recommendations(state), inputs=[code_state], outputs=[code_recommendations] ) msg.submit( generate_response, inputs=[msg, current_session_id, model_dropdown, chatbot], outputs=[chatbot] ).then(lambda: "", None, [msg]) send_btn.click( generate_response, inputs=[msg, current_session_id, model_dropdown, chatbot], outputs=[chatbot] ).then(lambda: "", None, [msg]) clear_btn.click( lambda: ([], None, "No file uploaded", {}, None), None, [chatbot, current_session_id, file_status, code_state, code_metrics] ) # Tech tool handlers repo_search_btn.click( perform_repo_search, inputs=[repo_query, language, sort_by, min_stars], outputs=[repo_results] ) so_search_btn.click( perform_stack_search, inputs=[stack_query, tag, so_sort_by], outputs=[stack_results] ) explain_btn.click( explain_code, inputs=[code_input], outputs=[code_explanation] ) # Add footer with attribution gr.HTML("""
Created by Calvin Allen Crawford
""") # Launch the app if __name__ == "__main__": demo.launch()