from fastapi import FastAPI
from pydantic import BaseModel
from typing import Dict, List
import gradio as gr
import pandas as pd
import json
import re
from src.core import *
from src.ressources.main_css import *
app = FastAPI(
title="Insight Finder",
description="Find relevant technologies from a problem",
)
class InputProblem(BaseModel):
problem: str
class InputConstraints(BaseModel):
constraints: Dict[str, str]
# This schema defines the structure for a single technology object
class Technology(BaseModel):
"""Represents a single technology entry with its details."""
title: str
purpose: str
key_components: str
advantages: str
limitations: str
id: int
class OutputPriorArt(BaseModel):
"""Represents the search of prior art using the technology combinations"""
content: str
uris: List
class InputPriorArtConstraints(BaseModel):
technologies: List[Technology]
constraints: Dict[str, str]
class InputPriorArtProblem(BaseModel):
technologies: List[Technology]
problem: str
# This schema defines the root structure of the JSON
class TechnologyData(BaseModel):
"""Represents the top-level object containing a list of technologies."""
technologies: List[Technology]
@app.post("/process", response_model=TechnologyData)
async def process(data: InputProblem):
result= process_input(data, global_tech, global_tech_embeddings, "problem")
return {"technologies": result}
@app.post("/process-constraints", response_model=TechnologyData)
async def process_constraints(constraints: InputConstraints):
result= process_input(constraints.constraints, global_tech, global_tech_embeddings, "constraints")
return {"technologies": result}
@app.post("/prior-art-constraints", response_model=OutputPriorArt)
async def prior_art_constraints(data: InputPriorArtConstraints):
prior_art = process_prior_art(data.technologies, data.constraints, "constraints", "pydantic")
print(prior_art)
return prior_art
@app.post("/prior-art-problems", response_model=OutputPriorArt)
async def prior_art_problems(data: InputPriorArtProblem):
prior_art = process_prior_art(data.technologies, data.problems, "problem", "pydantic")
return prior_art
def make_json_serializable(data):
if isinstance(data, dict):
return {k: make_json_serializable(v) for k, v in data.items()}
elif isinstance(data, list):
return [make_json_serializable(item) for item in data]
elif isinstance(data, tuple):
return tuple(make_json_serializable(item) for item in data)
elif hasattr(data, 'item'):
return float(data.item())
else:
return data
def format_constraints_html(constraints: dict) -> str:
html_content = "
"
for title, description in constraints.items():
html_content += f"""
"""
html_content += "
"
return "Retrieved Constraints
" + html_content
def format_best_combinations_html(combinations_data: list) -> str:
html_content = ""
for i, combination in enumerate(combinations_data):
problem_title = combination.get("problem", {}).get("title", f"Problem {i+1}")
technologies = combination.get("technologies", [])
html_content += f"""
{problem_title}
"""
for tech_info_score in technologies:
tech_info = tech_info_score[0]
if isinstance(tech_info, dict):
html_content += f"""
{tech_info.get('title', 'N/A')}
Purpose: {tech_info.get('purpose', 'N/A')}
Components: {tech_info.get('key_components', 'N/A')}
Advantages: {tech_info.get('advantages', 'N/A')}
Limitations: {tech_info.get('limitations', 'N/A')}
"""
html_content += """
"""
html_content += "
"
return "The 5 Best Technology Combinations per constraint
" + html_content
def format_final_technologies_html(technologies_list: list) -> str:
html_content = ""
for tech_info in technologies_list:
if isinstance(tech_info, dict):
html_content += f"""
{tech_info.get('title', 'N/A')}
Purpose: {tech_info.get('purpose', 'N/A')}
Components: {tech_info.get('key_components', 'N/A')}
Advantages: {tech_info.get('advantages', 'N/A')}
Limitations: {tech_info.get('limitations', 'N/A')}
"""
html_content += "
"
return "The best technologies combinations
" + html_content
def format_prior_art_html(prior_art_data: dict) -> str:
if not prior_art_data or 'content' not in prior_art_data:
return "No prior art data available.
"
content = prior_art_data['content']
uris = prior_art_data.get('uris', [])
# 1. Convert **text** to text
processed_content = re.sub(r'\*\*(.*?)\*\*', r'\1', content)
# 2. Convert [x](uri) to clickable links
# This regex handles cases where [x] is followed by (uri)
# It captures the number (group 1) and the URI (group 2)
processed_content = re.sub(r'\[(\d+)\]\((https?:\/\/[^\s\)]+)\)', r'\1', processed_content)
# Split content into initial summary and then document sections
sections = processed_content.split("Here are the documents found and the technologies used within them:\n\n")
summary_html = ""
documents_html = ""
# Process summary part (the first part of the split)
if len(sections) > 0:
summary_lines = sections[0].strip().split('\n')
summary_html += " \n"
for line in summary_lines:
if line.strip().startswith('*'):
# For bullet points, specially format bold text
# The bolding for **text** is already handled by re.sub
parts = line.split(':', 1)
if len(parts) > 1:
summary_html += f"
{parts[0].replace('*', '').strip()}: {parts[1].strip()}
\n"
else:
summary_html += f"
{line.replace('*', '').strip()}
\n"
elif line.strip():
summary_html += f"
{line.strip()}
\n"
summary_html += "
\n"
# Process documents part (the second part of the split)
if len(sections) > 1:
documents_raw = sections[1].strip()
# Split by "number. **" to get individual document entries reliably
document_entries = re.split(r'(\d+\.\s*\*\*.*?\*\*)', documents_raw)
parsed_docs = []
for i in range(1, len(document_entries), 2):
title_line = document_entries[i].strip()
content_block = document_entries[i+1].strip() if i+1 < len(document_entries) else ""
parsed_docs.append({'title_line': title_line, 'content_block': content_block})
documents_html += " \n"
for doc in parsed_docs:
doc_number_title = doc['title_line']
doc_content_lines = [l.strip() for l in doc['content_block'].split('\n') if l.strip()]
doc_description = ""
tech_used_section = []
desc_start_idx = -1
tech_start_idx = -1
for idx, line in enumerate(doc_content_lines):
if line.startswith("Description:"):
desc_start_idx = idx
elif line.startswith("Technologies Used:"):
tech_start_idx = idx
if desc_start_idx != -1:
desc_end_idx = tech_start_idx if tech_start_idx != -1 else len(doc_content_lines)
doc_description = " ".join(doc_content_lines[desc_start_idx:desc_end_idx]).replace("Description:", "").strip()
if tech_start_idx != -1:
tech_used_section = [l.replace('*', '').strip() for l in doc_content_lines[tech_start_idx:] if l.strip().startswith('*')]
documents_html += f"""\
{doc_number_title}
Description: {doc_description}
\n"""
if tech_used_section:
documents_html += "
\n"
documents_html += "
Technologies Used:
\n
\n"
for tech_item in tech_used_section:
if tech_item.strip():
tech_parts = tech_item.split(':', 1)
if len(tech_parts) > 1:
documents_html += f" - {tech_parts[0].strip()}: {tech_parts[1].strip()}
\n"
else:
documents_html += f" - {tech_item.strip()}
\n"
documents_html += "
\n
\n"
documents_html += "
\n"
documents_html += "
\n"
# Grouped URLs at the end
grouped_uris_html = ""
if uris:
grouped_uris_html += " \n"
grouped_uris_html += "
\n" # Disruptive line
grouped_uris_html += "
Referenced Documents (URIs):
\n"
grouped_uris_html += "
\n"
for idx, uri in enumerate(uris):
grouped_uris_html += f" - {idx + 1}. Document {idx + 1} Link
\n"
grouped_uris_html += "
\n
\n"
return f"\n{summary_html}{documents_html}{grouped_uris_html}
"
def gradio_prior_art(best_technologies, constraints):
prior_art = process_prior_art(best_technologies, constraints, "constraints", "dict")
html_prior_art = format_prior_art_html(prior_art)
print(html_prior_art)
return html_prior_art
def process_input_gradio(problem_description: str):
"""
Processes the input problem description step-by-step for Gradio.
Returns all intermediate results.
"""
# Step 1: Set Prompt
prompt = set_prompt(problem_description)
# Step 2: Retrieve Constraints
constraints = retrieve_constraints(prompt)
# Step 3: Stem Constraints
constraints_stemmed = stem(constraints, "constraints")
save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx")
print(constraints_stemmed)
# Step 4: Global Tech (already loaded, just acknowledge)
# save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading
# Step 5: Get Contrastive Similarities
result_similarities, matrix = get_contrastive_similarities(
constraints_stemmed, global_tech, global_tech_embeddings
)
save_to_pickle(result_similarities)
# Step 6: Find Best List Combinations
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix)
# Step 7: Select Technologies
best_technologies_id = select_technologies(best_combinations)
# Step 8: Get Technologies by ID
best_technologies = get_technologies_by_id(best_technologies_id, global_tech)
# Format outputs for Gradio
# For Constraints:
constraints_html = format_constraints_html(constraints)
# For Best Combinations:
best_combinations_html = format_best_combinations_html(best_combinations)
# For Final Technologies:
final_technologies_html = format_final_technologies_html(best_technologies)
return (
prompt,
constraints_html, # Output HTML for constraints
best_combinations_html, # Output HTML for best combinations
", ".join(map(str, best_technologies_id)), # Still a simple text list
final_technologies_html, # Output HTML for final technologies
{"technologies": best_technologies}, # `best_technologies` is the actual list of dicts
constraints
)
# Return a gr.update object to change the value and visibility in one step
# return gr.update(value=html_prior_art, visible=True)
# --- Gradio Interface Setup ---
input_problem = gr.Textbox(
label="Enter Problem Description",
placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics."
)
output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False)
output_constraints = gr.HTML(label="2. Retrieved Constraints") # Changed to HTML
output_best_combinations = gr.HTML(label="7. Best Technology Combinations Found") # Changed to HTML
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False)
output_final_technologies = gr.HTML(label="9. Final Best Technologies") # Changed to HTML
output_prior_art = gr.HTML(label="10. Prior Art Analysis") # Initially hidden
stock_technologies = gr.JSON(visible=False)
stock_constraints = gr.JSON(visible=False)
with gr.Blocks(
theme=gr.themes.Soft(),
css=custom_css
) as gradio_app_blocks:
gr.Markdown("# Insight Finder: Step-by-Step Technology Selection")
gr.Markdown("## Enter a problem description to see how relevant technologies are identified through various processing steps.")
with gr.Row():
with gr.Column(scale=2):
input_problem.render()
with gr.Column(scale=1):
gr.Markdown("Click to start the analysis:"),
process_button = gr.Button("Process Problem", elem_id="process_button")
gr.Markdown("---")
gr.Markdown("### Processing Steps & Results:")
with gr.Row():
with gr.Column():
output_prompt.render()
output_constraints.render()
with gr.Column():
output_selected_ids.render()
output_best_combinations.render()
output_final_technologies.render()
gr.Markdown("---")
gr.Markdown("### Prior Art Analysis")
prior_art_button = gr.Button("Find Prior Art", elem_id="prior_art_button")
output_prior_art.render()
stock_technologies.render()
stock_constraints.render()
process_button.click(
fn=process_input_gradio,
inputs=input_problem,
outputs=[
output_prompt,
output_constraints,
output_best_combinations,
output_selected_ids,
output_final_technologies,
stock_technologies,
stock_constraints
]
)
prior_art_button.click(
fn=gradio_prior_art,
inputs=[stock_technologies, stock_constraints],
outputs=output_prior_art
)
gr.mount_gradio_app(app, gradio_app_blocks, path="/gradio")
#if __name__ == "__main__":
# gradio_app_blocks.launch()