|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
from typing import Dict, List |
|
import gradio as gr |
|
import pandas as pd |
|
import json |
|
import re |
|
from src.core import * |
|
from src.ressources.main_css import * |
|
|
|
|
|
app = FastAPI( |
|
title="Insight Finder", |
|
description="Find relevant technologies from a problem", |
|
) |
|
|
|
class InputProblem(BaseModel): |
|
problem: str |
|
|
|
class InputConstraints(BaseModel): |
|
constraints: Dict[str, str] |
|
|
|
|
|
class Technology(BaseModel): |
|
"""Represents a single technology entry with its details.""" |
|
title: str |
|
purpose: str |
|
key_components: str |
|
advantages: str |
|
limitations: str |
|
id: int |
|
|
|
class OutputPriorArt(BaseModel): |
|
"""Represents the search of prior art using the technology combinations""" |
|
content: str |
|
uris: List |
|
|
|
class InputPriorArtConstraints(BaseModel): |
|
technologies: List[Technology] |
|
constraints: Dict[str, str] |
|
|
|
class InputPriorArtProblem(BaseModel): |
|
technologies: List[Technology] |
|
problem: str |
|
|
|
|
|
|
|
class TechnologyData(BaseModel): |
|
"""Represents the top-level object containing a list of technologies.""" |
|
technologies: List[Technology] |
|
|
|
@app.post("/process", response_model=TechnologyData) |
|
async def process(data: InputProblem): |
|
result= process_input(data, global_tech, global_tech_embeddings, "problem") |
|
return {"technologies": result} |
|
|
|
@app.post("/process-constraints", response_model=TechnologyData) |
|
async def process_constraints(constraints: InputConstraints): |
|
result= process_input(constraints.constraints, global_tech, global_tech_embeddings, "constraints") |
|
return {"technologies": result} |
|
|
|
@app.post("/prior-art-constraints", response_model=OutputPriorArt) |
|
async def prior_art_constraints(data: InputPriorArtConstraints): |
|
prior_art = process_prior_art(data.technologies, data.constraints, "constraints", "pydantic") |
|
print(prior_art) |
|
return prior_art |
|
|
|
@app.post("/prior-art-problems", response_model=OutputPriorArt) |
|
async def prior_art_problems(data: InputPriorArtProblem): |
|
prior_art = process_prior_art(data.technologies, data.problems, "problem", "pydantic") |
|
return prior_art |
|
|
|
def make_json_serializable(data): |
|
if isinstance(data, dict): |
|
return {k: make_json_serializable(v) for k, v in data.items()} |
|
elif isinstance(data, list): |
|
return [make_json_serializable(item) for item in data] |
|
elif isinstance(data, tuple): |
|
return tuple(make_json_serializable(item) for item in data) |
|
elif hasattr(data, 'item'): |
|
return float(data.item()) |
|
else: |
|
return data |
|
|
|
def format_constraints_html(constraints: dict) -> str: |
|
html_content = "<div class='constraints-container'>" |
|
for title, description in constraints.items(): |
|
html_content += f""" |
|
<div class='constraint-item'> |
|
<p><span class='constraint-title'>{title}:</span> <span class='constraint-description'>{description}</span></p> |
|
</div> |
|
""" |
|
html_content += "</div>" |
|
return "<h1>Retrieved Constraints</h1>" + html_content |
|
|
|
def format_best_combinations_html(combinations_data: list) -> str: |
|
html_content = "<div class='combinations-outer-container'>" |
|
for i, combination in enumerate(combinations_data): |
|
problem_title = combination.get("problem", {}).get("title", f"Problem {i+1}") |
|
technologies = combination.get("technologies", []) |
|
|
|
html_content += f""" |
|
<div class='problem-card'> |
|
<h3 class='problem-card-title'>{problem_title}</h3> |
|
<div class='technologies-inner-container'> |
|
""" |
|
for tech_info_score in technologies: |
|
tech_info = tech_info_score[0] |
|
if isinstance(tech_info, dict): |
|
html_content += f""" |
|
<div class='technology-card'> |
|
<h4 class='tech-card-title'>{tech_info.get('title', 'N/A')}</h4> |
|
<p><strong>Purpose:</strong> {tech_info.get('purpose', 'N/A')}</p> |
|
<p><strong>Components:</strong> {tech_info.get('key_components', 'N/A')}</p> |
|
<p><strong>Advantages:</strong> {tech_info.get('advantages', 'N/A')}</p> |
|
<p><strong>Limitations:</strong> {tech_info.get('limitations', 'N/A')}</p> |
|
</div> |
|
""" |
|
html_content += """ |
|
</div> |
|
</div> |
|
""" |
|
html_content += "</div>" |
|
return "<h1>The 5 Best Technology Combinations per constraint</h1>" + html_content |
|
|
|
def format_final_technologies_html(technologies_list: list) -> str: |
|
html_content = "<div class='final-tech-container'>" |
|
for tech_info in technologies_list: |
|
if isinstance(tech_info, dict): |
|
html_content += f""" |
|
<div class='final-tech-card'> |
|
<h4 class='final-tech-title'>{tech_info.get('title', 'N/A')}</h4> |
|
<p><strong>Purpose:</strong> {tech_info.get('purpose', 'N/A')}</p> |
|
<p><strong>Components:</strong> {tech_info.get('key_components', 'N/A')}</p> |
|
<p><strong>Advantages:</strong> {tech_info.get('advantages', 'N/A')}</p> |
|
<p><strong>Limitations:</strong> {tech_info.get('limitations', 'N/A')}</p> |
|
</div> |
|
""" |
|
html_content += "</div>" |
|
return "<h1>The best technologies combinations </h1>" + html_content |
|
|
|
def format_prior_art_html(prior_art_data: dict) -> str: |
|
if not prior_art_data or 'content' not in prior_art_data: |
|
return "<div class='prior-art-container'><p>No prior art data available.</p></div>" |
|
|
|
content = prior_art_data['content'] |
|
uris = prior_art_data.get('uris', []) |
|
|
|
|
|
processed_content = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', content) |
|
|
|
|
|
|
|
|
|
processed_content = re.sub(r'\[(\d+)\]\((https?:\/\/[^\s\)]+)\)', r'<a href="\2" target="_blank" class="prior-art-inline-link">\1</a>', processed_content) |
|
|
|
|
|
sections = processed_content.split("Here are the documents found and the technologies used within them:\n\n") |
|
|
|
summary_html = "" |
|
documents_html = "" |
|
|
|
|
|
if len(sections) > 0: |
|
summary_lines = sections[0].strip().split('\n') |
|
summary_html += " <div class='prior-art-summary'>\n" |
|
for line in summary_lines: |
|
if line.strip().startswith('*'): |
|
|
|
|
|
parts = line.split(':', 1) |
|
if len(parts) > 1: |
|
summary_html += f" <p class='summary-bullet'><strong>{parts[0].replace('*', '').strip()}:</strong> {parts[1].strip()}</p>\n" |
|
else: |
|
summary_html += f" <p class='summary-bullet'>{line.replace('*', '').strip()}</p>\n" |
|
elif line.strip(): |
|
summary_html += f" <p>{line.strip()}</p>\n" |
|
summary_html += " </div>\n" |
|
|
|
|
|
if len(sections) > 1: |
|
documents_raw = sections[1].strip() |
|
|
|
document_entries = re.split(r'(\d+\.\s*\*\*.*?\*\*)', documents_raw) |
|
|
|
parsed_docs = [] |
|
for i in range(1, len(document_entries), 2): |
|
title_line = document_entries[i].strip() |
|
content_block = document_entries[i+1].strip() if i+1 < len(document_entries) else "" |
|
parsed_docs.append({'title_line': title_line, 'content_block': content_block}) |
|
|
|
documents_html += " <div class='prior-art-documents'>\n" |
|
for doc in parsed_docs: |
|
doc_number_title = doc['title_line'] |
|
doc_content_lines = [l.strip() for l in doc['content_block'].split('\n') if l.strip()] |
|
|
|
doc_description = "" |
|
tech_used_section = [] |
|
|
|
desc_start_idx = -1 |
|
tech_start_idx = -1 |
|
|
|
for idx, line in enumerate(doc_content_lines): |
|
if line.startswith("Description:"): |
|
desc_start_idx = idx |
|
elif line.startswith("Technologies Used:"): |
|
tech_start_idx = idx |
|
|
|
if desc_start_idx != -1: |
|
desc_end_idx = tech_start_idx if tech_start_idx != -1 else len(doc_content_lines) |
|
doc_description = " ".join(doc_content_lines[desc_start_idx:desc_end_idx]).replace("Description:", "").strip() |
|
|
|
if tech_start_idx != -1: |
|
tech_used_section = [l.replace('*', '').strip() for l in doc_content_lines[tech_start_idx:] if l.strip().startswith('*')] |
|
|
|
|
|
documents_html += f"""\ |
|
<div class='prior-art-document-card'> |
|
<h4 class='document-title'>{doc_number_title}</h4> |
|
<p class='document-description'><strong>Description:</strong> {doc_description}</p>\n""" |
|
if tech_used_section: |
|
documents_html += " <div class='document-technologies'>\n" |
|
documents_html += " <h5>Technologies Used:</h5>\n <ul>\n" |
|
for tech_item in tech_used_section: |
|
if tech_item.strip(): |
|
tech_parts = tech_item.split(':', 1) |
|
if len(tech_parts) > 1: |
|
documents_html += f" <li><strong>{tech_parts[0].strip()}:</strong> {tech_parts[1].strip()}</li>\n" |
|
else: |
|
documents_html += f" <li>{tech_item.strip()}</li>\n" |
|
documents_html += " </ul>\n </div>\n" |
|
documents_html += " </div>\n" |
|
documents_html += " </div>\n" |
|
|
|
|
|
grouped_uris_html = "" |
|
if uris: |
|
grouped_uris_html += " <div class='grouped-uris-section'>\n" |
|
grouped_uris_html += " <hr class='disruptive-line'>\n" |
|
grouped_uris_html += " <h3>Referenced Documents (URIs):</h3>\n" |
|
grouped_uris_html += " <ul>\n" |
|
for idx, uri in enumerate(uris): |
|
grouped_uris_html += f" <li>{idx + 1}. <a href='{uri}' target='_blank' class='prior-art-grouped-link'>Document {idx + 1} Link</a></li>\n" |
|
grouped_uris_html += " </ul>\n </div>\n" |
|
|
|
return f"<div class='prior-art-container'>\n{summary_html}{documents_html}{grouped_uris_html}</div>" |
|
|
|
|
|
def gradio_prior_art(best_technologies, constraints): |
|
prior_art = process_prior_art(best_technologies, constraints, "constraints", "dict") |
|
html_prior_art = format_prior_art_html(prior_art) |
|
print(html_prior_art) |
|
return html_prior_art |
|
|
|
def process_input_gradio(problem_description: str): |
|
""" |
|
Processes the input problem description step-by-step for Gradio. |
|
Returns all intermediate results. |
|
""" |
|
|
|
prompt = set_prompt(problem_description) |
|
|
|
|
|
constraints = retrieve_constraints(prompt) |
|
|
|
|
|
constraints_stemmed = stem(constraints, "constraints") |
|
save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx") |
|
print(constraints_stemmed) |
|
|
|
|
|
|
|
|
|
|
|
result_similarities, matrix = get_contrastive_similarities( |
|
constraints_stemmed, global_tech, global_tech_embeddings |
|
) |
|
save_to_pickle(result_similarities) |
|
|
|
|
|
best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) |
|
|
|
|
|
best_technologies_id = select_technologies(best_combinations) |
|
|
|
|
|
best_technologies = get_technologies_by_id(best_technologies_id, global_tech) |
|
|
|
|
|
|
|
constraints_html = format_constraints_html(constraints) |
|
|
|
|
|
best_combinations_html = format_best_combinations_html(best_combinations) |
|
|
|
|
|
final_technologies_html = format_final_technologies_html(best_technologies) |
|
|
|
return ( |
|
prompt, |
|
constraints_html, |
|
best_combinations_html, |
|
", ".join(map(str, best_technologies_id)), |
|
final_technologies_html, |
|
{"technologies": best_technologies}, |
|
constraints |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
input_problem = gr.Textbox( |
|
label="Enter Problem Description", |
|
placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics." |
|
) |
|
|
|
output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False) |
|
output_constraints = gr.HTML(label="2. Retrieved Constraints") |
|
output_best_combinations = gr.HTML(label="7. Best Technology Combinations Found") |
|
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False) |
|
output_final_technologies = gr.HTML(label="9. Final Best Technologies") |
|
output_prior_art = gr.HTML(label="10. Prior Art Analysis") |
|
|
|
stock_technologies = gr.JSON(visible=False) |
|
stock_constraints = gr.JSON(visible=False) |
|
|
|
with gr.Blocks( |
|
theme=gr.themes.Soft(), |
|
css=custom_css |
|
) as gradio_app_blocks: |
|
gr.Markdown("# Insight Finder: Step-by-Step Technology Selection") |
|
gr.Markdown("## Enter a problem description to see how relevant technologies are identified through various processing steps.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
input_problem.render() |
|
with gr.Column(scale=1): |
|
gr.Markdown("Click to start the analysis:"), |
|
process_button = gr.Button("Process Problem", elem_id="process_button") |
|
|
|
|
|
gr.Markdown("---") |
|
gr.Markdown("### Processing Steps & Results:") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
output_prompt.render() |
|
output_constraints.render() |
|
with gr.Column(): |
|
output_selected_ids.render() |
|
output_best_combinations.render() |
|
output_final_technologies.render() |
|
|
|
gr.Markdown("---") |
|
gr.Markdown("### Prior Art Analysis") |
|
prior_art_button = gr.Button("Find Prior Art", elem_id="prior_art_button") |
|
output_prior_art.render() |
|
stock_technologies.render() |
|
stock_constraints.render() |
|
|
|
process_button.click( |
|
fn=process_input_gradio, |
|
inputs=input_problem, |
|
outputs=[ |
|
output_prompt, |
|
output_constraints, |
|
output_best_combinations, |
|
output_selected_ids, |
|
output_final_technologies, |
|
stock_technologies, |
|
stock_constraints |
|
] |
|
) |
|
|
|
prior_art_button.click( |
|
fn=gradio_prior_art, |
|
inputs=[stock_technologies, stock_constraints], |
|
outputs=output_prior_art |
|
) |
|
|
|
|
|
gr.mount_gradio_app(app, gradio_app_blocks, path="/gradio") |
|
|
|
|