Spaces:
Runtime error
Runtime error
from src.services.utils import * | |
from src.services.processor import * | |
global_tech, global_tech_embeddings = load_technologies() | |
def process_input(data, global_tech, global_tech_embeddings): | |
prompt = set_prompt(data.problem) | |
constraints = retrieve_constraints(prompt) | |
constraints_stemmed = stem(constraints, "constraints") | |
save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx") | |
#global_tech, keys, original_tech = preprocess_tech_data(df) | |
save_dataframe(global_tech, "global_tech.xlsx") | |
result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, ) | |
save_to_pickle(result_similarities) | |
print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}") | |
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) | |
best_technologies_id = select_technologies(best_combinations) | |
best_technologies = get_technologies_by_id(best_technologies_id,global_tech) | |
return best_technologies | |
def process_input_from_constraints(constraints, global_tech, global_tech_embeddings): | |
constraints_stemmed = stem(constraints, "constraints") | |
save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx") | |
#global_tech, keys, original_tech = preprocess_tech_data(df) | |
save_dataframe(global_tech, "global_tech.xlsx") | |
result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, ) | |
save_to_pickle(result_similarities) | |
print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}") | |
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) | |
best_technologies_id = select_technologies(best_combinations) | |
best_technologies = get_technologies_by_id(best_technologies_id,global_tech) | |
return best_technologies | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import random | |
import json | |
# --- Dummy Implementations for src.services.utils and src.services.processor --- | |
# These functions simulate the behavior of your actual services for the Gradio interface. | |
def load_technologies(): | |
""" | |
Dummy function to simulate loading technologies and their embeddings. | |
Returns a sample DataFrame and a dummy numpy array for embeddings. | |
""" | |
tech_data = { | |
'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], | |
'name': [ | |
'Machine Learning', 'Cloud Computing', 'Blockchain', 'Cybersecurity', | |
'Data Analytics', 'Artificial Intelligence', 'DevOps', 'Quantum Computing', | |
'Edge Computing', 'Robotics' | |
], | |
'description': [ | |
'Algorithms for learning from data.', 'On-demand computing resources.', | |
'Decentralized ledger technology.', 'Protecting systems from threats.', | |
'Analyzing large datasets.', 'Simulating human intelligence.', | |
'Software development and operations.', 'Utilizing quantum mechanics.', | |
'Processing data near the source.', 'Automated machines.' | |
] | |
} | |
global_tech_df = pd.DataFrame(tech_data) | |
# Simulate embeddings as random vectors | |
global_tech_embeddings_array = np.random.rand(len(global_tech_df), 128) | |
return global_tech_df, global_tech_embeddings_array | |
def set_prompt(problem_description: str) -> str: | |
""" | |
Dummy function to simulate prompt generation. | |
""" | |
return f"Based on the problem: '{problem_description}', what are the key technical challenges and requirements?" | |
def retrieve_constraints(prompt: str) -> list[str]: | |
""" | |
Dummy function to simulate constraint retrieval. | |
Returns a few sample constraints based on the prompt. | |
""" | |
if "security" in prompt.lower() or "secure" in prompt.lower(): | |
return ["high security", "data privacy", "authentication"] | |
elif "performance" in prompt.lower() or "speed" in prompt.lower(): | |
return ["low latency", "high throughput", "scalability"] | |
elif "data" in prompt.lower() or "analyze" in prompt.lower(): | |
return ["data integration", "real-time analytics", "data storage"] | |
return ["cost-efficiency", "ease of integration", "maintainability", "scalability"] | |
def stem(text_list: list[str], type_of_text: str) -> list[str]: | |
""" | |
Dummy function to simulate stemming. | |
Simplistically removes 'ing', 's', 'es' from words. | |
""" | |
stemmed_list = [] | |
for text in text_list: | |
words = text.split() | |
stemmed_words = [] | |
for word in words: | |
word = word.lower() | |
if word.endswith("ing"): | |
word = word[:-3] | |
elif word.endswith("es"): | |
word = word[:-2] | |
elif word.endswith("s"): | |
word = word[:-1] | |
stemmed_words.append(word) | |
stemmed_list.append(" ".join(stemmed_words)) | |
return stemmed_list | |
def save_dataframe(df: pd.DataFrame, filename: str): | |
""" | |
Dummy function to simulate saving a DataFrame. | |
""" | |
print(f"Simulating saving DataFrame to {filename}") | |
# In a real scenario, you might save to Excel: df.to_excel(filename, index=False) | |
def save_to_pickle(data): | |
""" | |
Dummy function to simulate saving data to a pickle file. | |
""" | |
print(f"Simulating saving data to pickle: {type(data)}") | |
def get_contrastive_similarities(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, global_tech_embeddings: np.ndarray): | |
""" | |
Dummy function to simulate getting contrastive similarities. | |
Returns a dummy similarity matrix and result similarities. | |
""" | |
num_constraints = len(constraints_stemmed) | |
num_tech = len(global_tech_df) | |
# Simulate a similarity matrix | |
# Each row corresponds to a constraint, each column to a technology | |
matrix = np.random.rand(num_constraints, num_tech) | |
matrix = np.round(matrix, 3) # Round for better display | |
# Simulate result_similarities (e.g., top 3 technologies for each constraint) | |
result_similarities = {} | |
for i, constraint in enumerate(constraints_stemmed): | |
# Get top 3 tech indices for this constraint | |
top_tech_indices = np.argsort(matrix[i])[::-1][:3] | |
top_tech_names = [global_tech_df.iloc[idx]['name'] for idx in top_tech_indices] | |
top_tech_scores = [matrix[i, idx] for idx in top_tech_indices] | |
result_similarities[constraint] = list(zip(top_tech_names, top_tech_scores)) | |
return result_similarities, matrix | |
def find_best_list_combinations(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, matrix: np.ndarray) -> list[dict]: | |
""" | |
Dummy function to simulate finding best list combinations. | |
Returns a few dummy combinations of technologies. | |
""" | |
best_combinations = [] | |
# Simulate finding combinations that best cover constraints | |
for i in range(min(3, len(constraints_stemmed))): # Create up to 3 dummy combinations | |
combination = { | |
"technologies": [], | |
"score": round(random.uniform(0.7, 0.95), 2), | |
"covered_constraints": [] | |
} | |
num_tech_in_combo = random.randint(2, 4) | |
selected_tech_ids = random.sample(global_tech_df['id'].tolist(), num_tech_in_combo) | |
for tech_id in selected_tech_ids: | |
tech_name = global_tech_df[global_tech_df['id'] == tech_id]['name'].iloc[0] | |
combination["technologies"].append({"id": tech_id, "name": tech_name}) | |
# Assign some random constraints to be covered | |
num_covered_constraints = random.randint(1, len(constraints_stemmed)) | |
combination["covered_constraints"] = random.sample(constraints_stemmed, num_covered_constraints) | |
best_combinations.append(combination) | |
return best_combinations | |
def select_technologies(best_combinations: list[dict]) -> list[int]: | |
""" | |
Dummy function to simulate selecting technologies based on best combinations. | |
Returns a list of unique technology IDs. | |
""" | |
selected_ids = set() | |
for combo in best_combinations: | |
for tech in combo["technologies"]: | |
selected_ids.add(tech["id"]) | |
return list(selected_ids) | |
def get_technologies_by_id(tech_ids: list[int], global_tech_df: pd.DataFrame) -> list[dict]: | |
""" | |
Dummy function to simulate retrieving technology details by ID. | |
""" | |
selected_technologies = [] | |
for tech_id in tech_ids: | |
tech_info = global_tech_df[global_tech_df['id'] == tech_id] | |
if not tech_info.empty: | |
selected_technologies.append(tech_info.iloc[0].to_dict()) | |
return selected_technologies | |
# --- Core Logic (Modified for Gradio Interface) --- | |
# Load global technologies and embeddings once when the app starts | |
global_tech_df, global_tech_embeddings_array = load_technologies() | |
def process_input_gradio(problem_description: str): | |
""" | |
Processes the input problem description step-by-step for Gradio. | |
Returns all intermediate results. | |
""" | |
# Step 1: Set Prompt | |
prompt = set_prompt(problem_description) | |
# Step 2: Retrieve Constraints | |
constraints = retrieve_constraints(prompt) | |
# Step 3: Stem Constraints | |
constraints_stemmed = stem(constraints, "constraints") | |
save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx") | |
# Step 4: Global Tech (already loaded, just acknowledge) | |
# save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading | |
# Step 5: Get Contrastive Similarities | |
result_similarities, matrix = get_contrastive_similarities( | |
constraints_stemmed, global_tech_df, global_tech_embeddings_array | |
) | |
save_to_pickle(result_similarities) | |
# Step 6: Find Best List Combinations | |
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech_df, matrix) | |
# Step 7: Select Technologies | |
best_technologies_id = select_technologies(best_combinations) | |
# Step 8: Get Technologies by ID | |
best_technologies = get_technologies_by_id(best_technologies_id, global_tech_df) | |
# Format outputs for Gradio | |
# Convert numpy array to list of lists for better Gradio display | |
matrix_display = matrix.tolist() | |
# Convert result_similarities to a more readable format for Gradio | |
result_similarities_display = { | |
k: ", ".join([f"{name} ({score:.3f})" for name, score in v]) | |
for k, v in result_similarities.items() | |
} | |
best_combinations_display = json.dumps(best_combinations, indent=2) | |
best_technologies_display = json.dumps(best_technologies, indent=2) | |
return ( | |
prompt, | |
", ".join(constraints), | |
", ".join(constraints_stemmed), | |
"Global technologies loaded and ready.", # Acknowledge tech loading | |
str(result_similarities_display), # Convert dict to string for display | |
pd.DataFrame(matrix_display, index=constraints_stemmed, columns=global_tech_df['name']), # Display matrix as DataFrame | |
best_combinations_display, | |
", ".join(map(str, best_technologies_id)), | |
best_technologies_display | |
) | |
# --- Gradio Interface Setup --- | |
# Define the input and output components | |
input_problem = gr.Textbox( | |
label="Enter Problem Description", | |
placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics." | |
) | |
output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False) | |
output_constraints = gr.Textbox(label="2. Retrieved Constraints", interactive=False) | |
output_stemmed_constraints = gr.Textbox(label="3. Stemmed Constraints", interactive=False) | |
output_tech_loaded = gr.Textbox(label="4. Global Technologies Status", interactive=False) | |
output_similarities = gr.Textbox(label="5. Result Similarities (Constraint -> Top Technologies)", interactive=False) | |
output_matrix = gr.Dataframe(label="6. Similarity Matrix (Constraints vs. Technologies)", interactive=False) | |
output_best_combinations = gr.JSON(label="7. Best Technology Combinations Found", interactive=False) | |
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False) | |
output_final_technologies = gr.JSON(label="9. Final Best Technologies", interactive=False) | |
# Create the Gradio Interface | |
gr.Interface( | |
fn=process_input_gradio, | |
inputs=input_problem, | |
outputs=[ | |
output_prompt, | |
output_constraints, | |
output_stemmed_constraints, | |
output_tech_loaded, | |
output_similarities, | |
output_matrix, | |
output_best_combinations, | |
output_selected_ids, | |
output_final_technologies | |
], | |
title="Insight Finder: Step-by-Step Technology Selection", | |
description="Enter a problem description to see how relevant technologies are identified through various processing steps.", | |
allow_flagging="never" | |
).launch() |