Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,25 +17,28 @@ import re
|
|
17 |
from datetime import datetime, timedelta
|
18 |
from pathlib import Path
|
19 |
import torch
|
|
|
20 |
|
21 |
# Load environment variables
|
22 |
load_dotenv()
|
23 |
client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
|
24 |
|
25 |
-
#
|
26 |
try:
|
27 |
-
# Initialize embeddings with a simpler, more reliable model
|
28 |
embeddings = HuggingFaceInstructEmbeddings(
|
29 |
model_name="hkunlp/instructor-base",
|
30 |
model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
|
31 |
)
|
32 |
except Exception as e:
|
33 |
print(f"Warning: Failed to load primary embeddings model: {e}")
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
# Directory to store FAISS indexes
|
41 |
FAISS_INDEX_DIR = "faiss_indexes_tech"
|
@@ -48,44 +51,75 @@ user_vectorstores = {}
|
|
48 |
# Custom CSS for Tech theme
|
49 |
custom_css = """
|
50 |
:root {
|
51 |
-
--primary-color: #4285F4;
|
52 |
-
--secondary-color: #34A853;
|
|
|
53 |
--light-background: #F8F9FA;
|
54 |
--dark-text: #202124;
|
55 |
--white: #FFFFFF;
|
56 |
--border-color: #DADCE0;
|
57 |
--code-bg: #F1F3F4;
|
58 |
-
--code-text: #37474F;
|
59 |
-
--error-color: #EA4335; /* Google Red */
|
60 |
-
--warning-color: #FBBC04; /* Google Yellow */
|
61 |
}
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
.
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
.
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
.
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
"""
|
90 |
|
91 |
# Function to process PDF files
|
@@ -127,68 +161,28 @@ def process_pdf(pdf_file):
|
|
127 |
|
128 |
# Function to generate chatbot responses with Tech theme
|
129 |
def generate_response(message, session_id, model_name, history):
|
|
|
130 |
if not message:
|
131 |
return history
|
|
|
132 |
try:
|
133 |
context = ""
|
134 |
-
if embeddings and session_id and session_id in user_vectorstores:
|
135 |
try:
|
136 |
vectorstore = user_vectorstores[session_id]
|
137 |
docs = vectorstore.similarity_search(message, k=3)
|
138 |
if docs:
|
139 |
-
context = "\n\nRelevant
|
140 |
except Exception as e:
|
141 |
print(f"Warning: Failed to perform similarity search: {e}")
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
repo_results = search_github_repos(query)
|
147 |
-
if repo_results:
|
148 |
-
response = "**GitHub Repository Search Results:**\n\n"
|
149 |
-
for repo in repo_results[:3]: # Limit to top 3 results
|
150 |
-
response += f"**[{repo['name']}]({repo['html_url']})**\n"
|
151 |
-
if repo['description']:
|
152 |
-
response += f"{repo['description']}\n"
|
153 |
-
response += f"⭐ {repo['stargazers_count']} | 🍴 {repo['forks_count']} | Language: {repo['language'] or 'Not specified'}\n"
|
154 |
-
response += f"Updated: {repo['updated_at'][:10]}\n\n"
|
155 |
-
history.append({"role": "assistant", "content": response})
|
156 |
-
return history
|
157 |
-
else:
|
158 |
-
history.append({"role": "assistant", "content": "No GitHub repositories found for your query."})
|
159 |
-
return history
|
160 |
-
|
161 |
-
# Check if it's a Stack Overflow search
|
162 |
-
if re.match(r'^/stack\s+.+', message, re.IGNORECASE):
|
163 |
-
query = re.sub(r'^/stack\s+', '', message, flags=re.IGNORECASE)
|
164 |
-
qa_results = search_stackoverflow(query)
|
165 |
-
if qa_results:
|
166 |
-
response = "**Stack Overflow Search Results:**\n\n"
|
167 |
-
for qa in qa_results[:3]: # Limit to top 3 results
|
168 |
-
response += f"**[{qa['title']}]({qa['link']})**\n"
|
169 |
-
response += f"Score: {qa['score']} | Answers: {qa['answer_count']}\n"
|
170 |
-
if 'tags' in qa and qa['tags']:
|
171 |
-
response += f"Tags: {', '.join(qa['tags'][:5])}\n"
|
172 |
-
response += f"Asked: {qa['creation_date']}\n\n"
|
173 |
-
history.append({"role": "assistant", "content": response})
|
174 |
-
return history
|
175 |
-
else:
|
176 |
-
history.append({"role": "assistant", "content": "No Stack Overflow questions found for your query."})
|
177 |
-
return history
|
178 |
|
179 |
-
# Check if it's a code explanation request
|
180 |
-
code_match = re.search(r'/explain\s+```(?:.+?)?\n(.+?)```', message, re.DOTALL)
|
181 |
-
if code_match:
|
182 |
-
code = code_match.group(1).strip()
|
183 |
-
explanation = explain_code(code)
|
184 |
-
history.append({"role": "assistant", "content": explanation})
|
185 |
-
return history
|
186 |
-
|
187 |
-
system_prompt = "You are a technical assistant specializing in software development, programming, and IT topics."
|
188 |
-
system_prompt += " Format code snippets with proper markdown code blocks with language specified."
|
189 |
-
system_prompt += " For technical explanations, be precise and include examples where helpful."
|
190 |
if context:
|
191 |
-
system_prompt += "
|
|
|
192 |
completion = client.chat.completions.create(
|
193 |
model=model_name,
|
194 |
messages=[
|
@@ -198,11 +192,14 @@ def generate_response(message, session_id, model_name, history):
|
|
198 |
temperature=0.7,
|
199 |
max_tokens=1024
|
200 |
)
|
|
|
201 |
response = completion.choices[0].message.content
|
202 |
history.append({"role": "assistant", "content": response})
|
203 |
return history
|
|
|
204 |
except Exception as e:
|
205 |
-
|
|
|
206 |
return history
|
207 |
|
208 |
# Functions to update PDF viewer
|
@@ -457,6 +454,7 @@ def process_code_file(file_obj):
|
|
457 |
"""Process uploaded code files"""
|
458 |
if file_obj is None:
|
459 |
return None, "No file uploaded", {}
|
|
|
460 |
try:
|
461 |
content = file_obj.read().decode('utf-8')
|
462 |
file_extension = Path(file_obj.name).suffix.lower()
|
@@ -465,7 +463,8 @@ def process_code_file(file_obj):
|
|
465 |
# Calculate metrics
|
466 |
metrics = calculate_complexity_metrics(content, language)
|
467 |
|
468 |
-
#
|
|
|
469 |
if embeddings:
|
470 |
try:
|
471 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
@@ -477,9 +476,6 @@ def process_code_file(file_obj):
|
|
477 |
user_vectorstores[session_id] = vectorstore
|
478 |
except Exception as e:
|
479 |
print(f"Warning: Failed to create vectorstore: {e}")
|
480 |
-
session_id = None
|
481 |
-
else:
|
482 |
-
session_id = None
|
483 |
|
484 |
return session_id, f"✅ Successfully analyzed {file_obj.name}", metrics
|
485 |
except Exception as e:
|
@@ -492,8 +488,8 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
492 |
|
493 |
gr.HTML("""
|
494 |
<div class="header">
|
495 |
-
<div class="header-title">Tech-Vision</div>
|
496 |
-
<div class="header-subtitle">Advanced Code Analysis
|
497 |
</div>
|
498 |
""")
|
499 |
|
@@ -501,21 +497,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
|
501 |
with gr.Column(scale=1, min_width=300):
|
502 |
file_input = gr.File(
|
503 |
label="Upload Code File",
|
504 |
-
file_types=[
|
505 |
-
".py", ".js", ".java", ".cpp", ".c", ".cs", ".php",
|
506 |
-
".rb", ".go", ".rs", ".swift", ".kt", ".ts", ".html",
|
507 |
-
".css", ".sql", ".r", ".m", ".h", ".hpp", ".jsx",
|
508 |
-
".tsx", ".vue", ".scala", ".pl", ".sh", ".bash",
|
509 |
-
".ps1", ".yaml", ".yml", ".json", ".xml", ".toml", ".ini"
|
510 |
-
],
|
511 |
type="binary"
|
512 |
)
|
513 |
upload_button = gr.Button("Analyze Code", variant="primary")
|
514 |
file_status = gr.Markdown("No file uploaded yet")
|
515 |
model_dropdown = gr.Dropdown(
|
516 |
-
choices=["llama3-70b-8192", "
|
517 |
value="llama3-70b-8192",
|
518 |
-
label="Select
|
519 |
)
|
520 |
|
521 |
# Developer Tools Section
|
@@ -672,7 +662,7 @@ if __name__ == "__main__":
|
|
672 |
|
673 |
# Add new helper functions
|
674 |
def detect_language(extension):
|
675 |
-
"""
|
676 |
extension_map = {
|
677 |
".py": "Python",
|
678 |
".js": "JavaScript",
|
@@ -683,24 +673,7 @@ def detect_language(extension):
|
|
683 |
".php": "PHP",
|
684 |
".rb": "Ruby",
|
685 |
".go": "Go",
|
686 |
-
".
|
687 |
-
".swift": "Swift",
|
688 |
-
".kt": "Kotlin",
|
689 |
-
".ts": "TypeScript",
|
690 |
-
".html": "HTML",
|
691 |
-
".css": "CSS",
|
692 |
-
".sql": "SQL",
|
693 |
-
".r": "R",
|
694 |
-
".scala": "Scala",
|
695 |
-
".pl": "Perl",
|
696 |
-
".sh": "Shell",
|
697 |
-
".yaml": "YAML",
|
698 |
-
".yml": "YAML",
|
699 |
-
".json": "JSON",
|
700 |
-
".xml": "XML",
|
701 |
-
".jsx": "React JSX",
|
702 |
-
".tsx": "React TSX",
|
703 |
-
".vue": "Vue",
|
704 |
}
|
705 |
return extension_map.get(extension.lower(), "Unknown")
|
706 |
|
@@ -711,26 +684,13 @@ def calculate_complexity_metrics(content, language):
|
|
711 |
blank_lines = len([line for line in lines if not line.strip()])
|
712 |
code_lines = total_lines - blank_lines
|
713 |
|
714 |
-
# Get language patterns
|
715 |
-
patterns = LANGUAGE_PATTERNS.get(language.lower(), LANGUAGE_PATTERNS.get("python"))
|
716 |
-
|
717 |
-
# Calculate metrics using patterns
|
718 |
metrics = {
|
719 |
"language": language,
|
720 |
"total_lines": total_lines,
|
721 |
"code_lines": code_lines,
|
722 |
-
"blank_lines": blank_lines
|
723 |
-
"functions": len(re.findall(patterns["function"], content, re.MULTILINE)) if patterns else 0,
|
724 |
-
"classes": len(re.findall(patterns["class"], content, re.MULTILINE)) if patterns else 0,
|
725 |
-
"imports": len(re.findall(patterns["import"], content, re.MULTILINE)) if patterns else 0,
|
726 |
-
"comments": len(re.findall(patterns["comment"], content, re.MULTILINE)) if patterns else 0,
|
727 |
-
"conditionals": len(re.findall(patterns["conditional"], content, re.MULTILINE)) if patterns else 0,
|
728 |
-
"loops": len(re.findall(patterns["loop"], content, re.MULTILINE)) if patterns else 0,
|
729 |
}
|
730 |
|
731 |
-
# Calculate cyclomatic complexity
|
732 |
-
metrics["cyclomatic_complexity"] = 1 + metrics["conditionals"] + metrics["loops"]
|
733 |
-
|
734 |
return metrics
|
735 |
|
736 |
def generate_recommendations(metrics):
|
|
|
17 |
from datetime import datetime, timedelta
|
18 |
from pathlib import Path
|
19 |
import torch
|
20 |
+
import numpy as np
|
21 |
|
22 |
# Load environment variables
|
23 |
load_dotenv()
|
24 |
client = groq.Client(api_key=os.getenv("GROQ_TECH_API_KEY"))
|
25 |
|
26 |
+
# Initialize embeddings with error handling
|
27 |
try:
|
|
|
28 |
embeddings = HuggingFaceInstructEmbeddings(
|
29 |
model_name="hkunlp/instructor-base",
|
30 |
model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
|
31 |
)
|
32 |
except Exception as e:
|
33 |
print(f"Warning: Failed to load primary embeddings model: {e}")
|
34 |
+
try:
|
35 |
+
embeddings = HuggingFaceInstructEmbeddings(
|
36 |
+
model_name="all-MiniLM-L6-v2",
|
37 |
+
model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
|
38 |
+
)
|
39 |
+
except Exception as e:
|
40 |
+
print(f"Warning: Failed to load fallback embeddings model: {e}")
|
41 |
+
embeddings = None
|
42 |
|
43 |
# Directory to store FAISS indexes
|
44 |
FAISS_INDEX_DIR = "faiss_indexes_tech"
|
|
|
51 |
# Custom CSS for Tech theme
|
52 |
custom_css = """
|
53 |
:root {
|
54 |
+
--primary-color: #4285F4;
|
55 |
+
--secondary-color: #34A853;
|
56 |
+
--accent-color: #EA4335;
|
57 |
--light-background: #F8F9FA;
|
58 |
--dark-text: #202124;
|
59 |
--white: #FFFFFF;
|
60 |
--border-color: #DADCE0;
|
61 |
--code-bg: #F1F3F4;
|
|
|
|
|
|
|
62 |
}
|
63 |
+
|
64 |
+
body {
|
65 |
+
background-color: var(--light-background);
|
66 |
+
font-family: 'Google Sans', 'Roboto', sans-serif;
|
67 |
+
}
|
68 |
+
|
69 |
+
.container {
|
70 |
+
max-width: 1200px !important;
|
71 |
+
margin: 0 auto !important;
|
72 |
+
padding: 10px;
|
73 |
+
}
|
74 |
+
|
75 |
+
.header {
|
76 |
+
background-color: var(--white);
|
77 |
+
border-bottom: 1px solid var(--border-color);
|
78 |
+
padding: 15px 0;
|
79 |
+
margin-bottom: 20px;
|
80 |
+
border-radius: 12px 12px 0 0;
|
81 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
82 |
+
}
|
83 |
+
|
84 |
+
.header-title {
|
85 |
+
color: var(--primary-color);
|
86 |
+
font-size: 1.8rem;
|
87 |
+
font-weight: 700;
|
88 |
+
text-align: center;
|
89 |
+
}
|
90 |
+
|
91 |
+
.header-subtitle {
|
92 |
+
color: var(--dark-text);
|
93 |
+
font-size: 1rem;
|
94 |
+
text-align: center;
|
95 |
+
margin-top: 5px;
|
96 |
+
}
|
97 |
+
|
98 |
+
.chat-container {
|
99 |
+
border-radius: 12px !important;
|
100 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
|
101 |
+
background-color: var(--white) !important;
|
102 |
+
border: 1px solid var(--border-color) !important;
|
103 |
+
min-height: 500px;
|
104 |
+
}
|
105 |
+
|
106 |
+
.tool-container {
|
107 |
+
background-color: var(--white);
|
108 |
+
border-radius: 12px;
|
109 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
110 |
+
padding: 15px;
|
111 |
+
margin-bottom: 20px;
|
112 |
+
}
|
113 |
+
|
114 |
+
.code-block {
|
115 |
+
background-color: var(--code-bg);
|
116 |
+
padding: 12px;
|
117 |
+
border-radius: 8px;
|
118 |
+
font-family: 'Roboto Mono', monospace;
|
119 |
+
overflow-x: auto;
|
120 |
+
margin: 10px 0;
|
121 |
+
border-left: 3px solid var(--primary-color);
|
122 |
+
}
|
123 |
"""
|
124 |
|
125 |
# Function to process PDF files
|
|
|
161 |
|
162 |
# Function to generate chatbot responses with Tech theme
|
163 |
def generate_response(message, session_id, model_name, history):
|
164 |
+
"""Generate chatbot responses"""
|
165 |
if not message:
|
166 |
return history
|
167 |
+
|
168 |
try:
|
169 |
context = ""
|
170 |
+
if embeddings and session_id and session_id in user_vectorstores:
|
171 |
try:
|
172 |
vectorstore = user_vectorstores[session_id]
|
173 |
docs = vectorstore.similarity_search(message, k=3)
|
174 |
if docs:
|
175 |
+
context = "\n\nRelevant code context:\n" + "\n".join(f"```\n{doc.page_content}\n```" for doc in docs)
|
176 |
except Exception as e:
|
177 |
print(f"Warning: Failed to perform similarity search: {e}")
|
178 |
|
179 |
+
system_prompt = """You are a technical assistant specializing in software development and programming.
|
180 |
+
Provide clear, accurate responses with code examples when relevant.
|
181 |
+
Format code snippets with proper markdown code blocks and specify the language."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
if context:
|
184 |
+
system_prompt += f"\nUse this context from the uploaded code when relevant:{context}"
|
185 |
+
|
186 |
completion = client.chat.completions.create(
|
187 |
model=model_name,
|
188 |
messages=[
|
|
|
192 |
temperature=0.7,
|
193 |
max_tokens=1024
|
194 |
)
|
195 |
+
|
196 |
response = completion.choices[0].message.content
|
197 |
history.append({"role": "assistant", "content": response})
|
198 |
return history
|
199 |
+
|
200 |
except Exception as e:
|
201 |
+
error_msg = f"Error generating response: {str(e)}"
|
202 |
+
history.append({"role": "assistant", "content": error_msg})
|
203 |
return history
|
204 |
|
205 |
# Functions to update PDF viewer
|
|
|
454 |
"""Process uploaded code files"""
|
455 |
if file_obj is None:
|
456 |
return None, "No file uploaded", {}
|
457 |
+
|
458 |
try:
|
459 |
content = file_obj.read().decode('utf-8')
|
460 |
file_extension = Path(file_obj.name).suffix.lower()
|
|
|
463 |
# Calculate metrics
|
464 |
metrics = calculate_complexity_metrics(content, language)
|
465 |
|
466 |
+
# Create vectorstore if embeddings are available
|
467 |
+
session_id = None
|
468 |
if embeddings:
|
469 |
try:
|
470 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
|
|
476 |
user_vectorstores[session_id] = vectorstore
|
477 |
except Exception as e:
|
478 |
print(f"Warning: Failed to create vectorstore: {e}")
|
|
|
|
|
|
|
479 |
|
480 |
return session_id, f"✅ Successfully analyzed {file_obj.name}", metrics
|
481 |
except Exception as e:
|
|
|
488 |
|
489 |
gr.HTML("""
|
490 |
<div class="header">
|
491 |
+
<div class="header-title">Tech-Vision AI</div>
|
492 |
+
<div class="header-subtitle">Advanced Code Analysis & Technical Assistant</div>
|
493 |
</div>
|
494 |
""")
|
495 |
|
|
|
497 |
with gr.Column(scale=1, min_width=300):
|
498 |
file_input = gr.File(
|
499 |
label="Upload Code File",
|
500 |
+
file_types=[".py", ".js", ".java", ".cpp", ".c", ".cs", ".php", ".rb", ".go", ".ts"],
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
type="binary"
|
502 |
)
|
503 |
upload_button = gr.Button("Analyze Code", variant="primary")
|
504 |
file_status = gr.Markdown("No file uploaded yet")
|
505 |
model_dropdown = gr.Dropdown(
|
506 |
+
choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
|
507 |
value="llama3-70b-8192",
|
508 |
+
label="Select Model"
|
509 |
)
|
510 |
|
511 |
# Developer Tools Section
|
|
|
662 |
|
663 |
# Add new helper functions
|
664 |
def detect_language(extension):
|
665 |
+
"""Detect programming language from file extension"""
|
666 |
extension_map = {
|
667 |
".py": "Python",
|
668 |
".js": "JavaScript",
|
|
|
673 |
".php": "PHP",
|
674 |
".rb": "Ruby",
|
675 |
".go": "Go",
|
676 |
+
".ts": "TypeScript"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
677 |
}
|
678 |
return extension_map.get(extension.lower(), "Unknown")
|
679 |
|
|
|
684 |
blank_lines = len([line for line in lines if not line.strip()])
|
685 |
code_lines = total_lines - blank_lines
|
686 |
|
|
|
|
|
|
|
|
|
687 |
metrics = {
|
688 |
"language": language,
|
689 |
"total_lines": total_lines,
|
690 |
"code_lines": code_lines,
|
691 |
+
"blank_lines": blank_lines
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
}
|
693 |
|
|
|
|
|
|
|
694 |
return metrics
|
695 |
|
696 |
def generate_recommendations(metrics):
|