insight-finder / app.py
heymenn's picture
Update app.py
bb03459 verified
raw
history blame
5.89 kB
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Dict, List
import gradio as gr
import pandas as pd
import json
from src.core import *
app = FastAPI(
title="Insight Finder",
description="Find relevant technologies from a problem",
)
class InputData(BaseModel):
problem: str
class InputConstraints(BaseModel):
constraints: Dict[str, str]
# This schema defines the structure for a single technology object
class Technology(BaseModel):
"""Represents a single technology entry with its details."""
title: str
purpose: str
key_components: str
advantages: str
limitations: str
id: int
# This schema defines the root structure of the JSON
class TechnologyData(BaseModel):
"""Represents the top-level object containing a list of technologies."""
technologies: List[Technology]
@app.post("/process", response_model=TechnologyData)
async def process(data: InputData):
result = process_input(data, global_tech, global_tech_embeddings)
return {"technologies": result}
@app.post("/process-constraints", response_model=TechnologyData)
async def process_constraints(constraints: InputConstraints):
result = process_input_from_constraints(constraints.constraints, global_tech, global_tech_embeddings)
return {"technologies": result}
def process_input_gradio(problem_description: str):
"""
Processes the input problem description step-by-step for Gradio.
Returns all intermediate results.
"""
# Step 1: Set Prompt
prompt = set_prompt(problem_description)
# Step 2: Retrieve Constraints
constraints = retrieve_constraints(prompt)
# Step 3: Stem Constraints
constraints_stemmed = stem(constraints, "constraints")
save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx")
# Step 4: Global Tech (already loaded, just acknowledge)
# save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading
# Step 5: Get Contrastive Similarities
result_similarities, matrix = get_contrastive_similarities(
constraints_stemmed, global_tech, global_tech_embeddings
)
save_to_pickle(result_similarities)
# Step 6: Find Best List Combinations
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix)
# Step 7: Select Technologies
best_technologies_id = select_technologies(best_combinations)
# Step 8: Get Technologies by ID
best_technologies = get_technologies_by_id(best_technologies_id, global_tech)
# Format outputs for Gradio
print(type(matrix))
print(dir(matrix))
matrix_display = matrix #.tolist() # Convert numpy array to list of lists for better Gradio display
print(result_similarities)
result_similarities_display = {
item['id2']: f"{item['constraint']['title']} ({item['similarity'].item():.3f})"
for item in result_similarities
}
best_combinations_display = json.dumps(best_combinations, indent=2)
best_technologies_display = json.dumps(best_technologies, indent=2)
return (
prompt,
", ".join(constraints),
", ".join(constraints_stemmed),
"Global technologies loaded and ready.", # Acknowledge tech loading
str(result_similarities_display), # Convert dict to string for display
pd.DataFrame(matrix_display, index=constraints_stemmed, columns=global_tech['name']), # Display matrix as DataFrame
best_combinations_display,
", ".join(map(str, best_technologies_id)),
best_technologies_display
)
# --- Gradio Interface Setup ---
# Define the input and output components
input_problem = gr.Textbox(
label="Enter Problem Description",
placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics."
)
output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False)
output_constraints = gr.Textbox(label="2. Retrieved Constraints", interactive=False)
output_stemmed_constraints = gr.Textbox(label="3. Stemmed Constraints", interactive=False)
output_tech_loaded = gr.Textbox(label="4. Global Technologies Status", interactive=False)
output_similarities = gr.Textbox(label="5. Result Similarities (Constraint -> Top Technologies)", interactive=False)
output_matrix = gr.Dataframe(label="6. Similarity Matrix (Constraints vs. Technologies)", interactive=False)
output_best_combinations = gr.JSON(label="7. Best Technology Combinations Found")
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False)
output_final_technologies = gr.JSON(label="9. Final Best Technologies")
# Create the Gradio Blocks demo
with gr.Blocks() as gradio_app_blocks:
gr.Markdown("# Insight Finder: Step-by-Step Technology Selection")
gr.Markdown("Enter a problem description to see how relevant technologies are identified through various processing steps.")
input_problem.render()
process_button = gr.Button("Process Problem")
with gr.Column():
output_prompt.render()
output_constraints.render()
output_stemmed_constraints.render()
output_tech_loaded.render()
output_similarities.render()
output_matrix.render()
output_best_combinations.render()
output_selected_ids.render()
output_final_technologies.render()
process_button.click(
fn=process_input_gradio,
inputs=input_problem,
outputs=[
output_prompt,
output_constraints,
output_stemmed_constraints,
output_tech_loaded,
output_similarities,
output_matrix,
output_best_combinations,
output_selected_ids,
output_final_technologies
]
)
gr.mount_gradio_app(app, gradio_app_blocks, path="/gradio")