from fastapi import FastAPI from pydantic import BaseModel from typing import Dict, List import gradio as gr import pandas as pd import json import re from src.core import * from src.ressources.main_css import * app = FastAPI( title="Insight Finder", description="Find relevant technologies from a problem", ) class InputProblem(BaseModel): problem: str class InputConstraints(BaseModel): constraints: Dict[str, str] # This schema defines the structure for a single technology object class Technology(BaseModel): """Represents a single technology entry with its details.""" title: str purpose: str key_components: str advantages: str limitations: str id: int class OutputPriorArt(BaseModel): """Represents the search of prior art using the technology combinations""" content: str uris: List class InputPriorArtConstraints(BaseModel): technologies: List[Technology] constraints: Dict[str, str] class InputPriorArtProblem(BaseModel): technologies: List[Technology] problem: str # This schema defines the root structure of the JSON class TechnologyData(BaseModel): """Represents the top-level object containing a list of technologies.""" technologies: List[Technology] @app.post("/process", response_model=TechnologyData) async def process(data: InputProblem): result= process_input(data, global_tech, global_tech_embeddings, "problem") return {"technologies": result} @app.post("/process-constraints", response_model=TechnologyData) async def process_constraints(constraints: InputConstraints): result= process_input(constraints.constraints, global_tech, global_tech_embeddings, "constraints") return {"technologies": result} @app.post("/prior-art-constraints", response_model=OutputPriorArt) async def prior_art_constraints(data: InputPriorArtConstraints): prior_art = process_prior_art(data.technologies, data.constraints, "constraints", "pydantic") print(prior_art) return prior_art @app.post("/prior-art-problems", response_model=OutputPriorArt) async def prior_art_problems(data: InputPriorArtProblem): prior_art = process_prior_art(data.technologies, data.problems, "problem", "pydantic") return prior_art def make_json_serializable(data): if isinstance(data, dict): return {k: make_json_serializable(v) for k, v in data.items()} elif isinstance(data, list): return [make_json_serializable(item) for item in data] elif isinstance(data, tuple): return tuple(make_json_serializable(item) for item in data) elif hasattr(data, 'item'): return float(data.item()) else: return data def format_constraints_html(constraints: dict) -> str: html_content = "
" for title, description in constraints.items(): html_content += f"""

{title}: {description}

""" html_content += "
" return "

Retrieved Constraints

" + html_content def format_best_combinations_html(combinations_data: list) -> str: html_content = "
" for i, combination in enumerate(combinations_data): problem_title = combination.get("problem", {}).get("title", f"Problem {i+1}") technologies = combination.get("technologies", []) html_content += f"""

{problem_title}

""" for tech_info_score in technologies: tech_info = tech_info_score[0] if isinstance(tech_info, dict): html_content += f"""

{tech_info.get('title', 'N/A')}

Purpose: {tech_info.get('purpose', 'N/A')}

Components: {tech_info.get('key_components', 'N/A')}

Advantages: {tech_info.get('advantages', 'N/A')}

Limitations: {tech_info.get('limitations', 'N/A')}

""" html_content += """
""" html_content += "
" return "

The 5 Best Technology Combinations per constraint

" + html_content def format_final_technologies_html(technologies_list: list) -> str: html_content = "
" for tech_info in technologies_list: if isinstance(tech_info, dict): html_content += f"""

{tech_info.get('title', 'N/A')}

Purpose: {tech_info.get('purpose', 'N/A')}

Components: {tech_info.get('key_components', 'N/A')}

Advantages: {tech_info.get('advantages', 'N/A')}

Limitations: {tech_info.get('limitations', 'N/A')}

""" html_content += "
" return "

The best technologies combinations

" + html_content def format_prior_art_html(prior_art_data: dict) -> str: if not prior_art_data or 'content' not in prior_art_data: return "

No prior art data available.

" content = prior_art_data['content'] uris = prior_art_data.get('uris', []) # 1. Convert **text** to text processed_content = re.sub(r'\*\*(.*?)\*\*', r'\1', content) # 2. Convert [x](uri) to clickable links # This regex handles cases where [x] is followed by (uri) # It captures the number (group 1) and the URI (group 2) processed_content = re.sub(r'\[(\d+)\]\((https?:\/\/[^\s\)]+)\)', r'\1', processed_content) # Split content into initial summary and then document sections sections = processed_content.split("Here are the documents found and the technologies used within them:\n\n") summary_html = "" documents_html = "" # Process summary part (the first part of the split) if len(sections) > 0: summary_lines = sections[0].strip().split('\n') summary_html += "
\n" for line in summary_lines: if line.strip().startswith('*'): # For bullet points, specially format bold text # The bolding for **text** is already handled by re.sub parts = line.split(':', 1) if len(parts) > 1: summary_html += f"

{parts[0].replace('*', '').strip()}: {parts[1].strip()}

\n" else: summary_html += f"

{line.replace('*', '').strip()}

\n" elif line.strip(): summary_html += f"

{line.strip()}

\n" summary_html += "
\n" # Process documents part (the second part of the split) if len(sections) > 1: documents_raw = sections[1].strip() # Split by "number. **" to get individual document entries reliably document_entries = re.split(r'(\d+\.\s*\*\*.*?\*\*)', documents_raw) parsed_docs = [] for i in range(1, len(document_entries), 2): title_line = document_entries[i].strip() content_block = document_entries[i+1].strip() if i+1 < len(document_entries) else "" parsed_docs.append({'title_line': title_line, 'content_block': content_block}) documents_html += "
\n" for doc in parsed_docs: doc_number_title = doc['title_line'] doc_content_lines = [l.strip() for l in doc['content_block'].split('\n') if l.strip()] doc_description = "" tech_used_section = [] desc_start_idx = -1 tech_start_idx = -1 for idx, line in enumerate(doc_content_lines): if line.startswith("Description:"): desc_start_idx = idx elif line.startswith("Technologies Used:"): tech_start_idx = idx if desc_start_idx != -1: desc_end_idx = tech_start_idx if tech_start_idx != -1 else len(doc_content_lines) doc_description = " ".join(doc_content_lines[desc_start_idx:desc_end_idx]).replace("Description:", "").strip() if tech_start_idx != -1: tech_used_section = [l.replace('*', '').strip() for l in doc_content_lines[tech_start_idx:] if l.strip().startswith('*')] documents_html += f"""\

{doc_number_title}

Description: {doc_description}

\n""" if tech_used_section: documents_html += "
\n" documents_html += "
Technologies Used:
\n
    \n" for tech_item in tech_used_section: if tech_item.strip(): tech_parts = tech_item.split(':', 1) if len(tech_parts) > 1: documents_html += f"
  • {tech_parts[0].strip()}: {tech_parts[1].strip()}
  • \n" else: documents_html += f"
  • {tech_item.strip()}
  • \n" documents_html += "
\n
\n" documents_html += "
\n" documents_html += "
\n" # Grouped URLs at the end grouped_uris_html = "" if uris: grouped_uris_html += "
\n" grouped_uris_html += "
\n" # Disruptive line grouped_uris_html += "

Referenced Documents (URIs):

\n" grouped_uris_html += " \n
\n" return f"
\n{summary_html}{documents_html}{grouped_uris_html}
" def gradio_prior_art(best_technologies, constraints): prior_art = process_prior_art(best_technologies, constraints, "constraints", "dict") html_prior_art = format_prior_art_html(prior_art) print(html_prior_art) return html_prior_art def process_input_gradio(problem_description: str): """ Processes the input problem description step-by-step for Gradio. Returns all intermediate results. """ # Step 1: Set Prompt prompt = set_prompt(problem_description) # Step 2: Retrieve Constraints constraints = retrieve_constraints(prompt) # Step 3: Stem Constraints constraints_stemmed = stem(constraints, "constraints") save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx") print(constraints_stemmed) # Step 4: Global Tech (already loaded, just acknowledge) # save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading # Step 5: Get Contrastive Similarities result_similarities, matrix = get_contrastive_similarities( constraints_stemmed, global_tech, global_tech_embeddings ) save_to_pickle(result_similarities) # Step 6: Find Best List Combinations best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) # Step 7: Select Technologies best_technologies_id = select_technologies(best_combinations) # Step 8: Get Technologies by ID best_technologies = get_technologies_by_id(best_technologies_id, global_tech) # Format outputs for Gradio # For Constraints: constraints_html = format_constraints_html(constraints) # For Best Combinations: best_combinations_html = format_best_combinations_html(best_combinations) # For Final Technologies: final_technologies_html = format_final_technologies_html(best_technologies) return ( prompt, constraints_html, # Output HTML for constraints best_combinations_html, # Output HTML for best combinations ", ".join(map(str, best_technologies_id)), # Still a simple text list final_technologies_html, # Output HTML for final technologies {"technologies": best_technologies}, # `best_technologies` is the actual list of dicts constraints ) # Return a gr.update object to change the value and visibility in one step # return gr.update(value=html_prior_art, visible=True) # --- Gradio Interface Setup --- input_problem = gr.Textbox( label="Enter Problem Description", placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics." ) output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False) output_constraints = gr.HTML(label="2. Retrieved Constraints") # Changed to HTML output_best_combinations = gr.HTML(label="7. Best Technology Combinations Found") # Changed to HTML output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False) output_final_technologies = gr.HTML(label="9. Final Best Technologies") # Changed to HTML output_prior_art = gr.HTML(label="10. Prior Art Analysis") # Initially hidden stock_technologies = gr.JSON(visible=False) stock_constraints = gr.JSON(visible=False) with gr.Blocks( theme=gr.themes.Soft(), css=custom_css ) as gradio_app_blocks: gr.Markdown("# Insight Finder: Step-by-Step Technology Selection") gr.Markdown("## Enter a problem description to see how relevant technologies are identified through various processing steps.") with gr.Row(): with gr.Column(scale=2): input_problem.render() with gr.Column(scale=1): gr.Markdown("Click to start the analysis:"), process_button = gr.Button("Process Problem", elem_id="process_button") gr.Markdown("---") gr.Markdown("### Processing Steps & Results:") with gr.Row(): with gr.Column(): output_prompt.render() output_constraints.render() with gr.Column(): output_selected_ids.render() output_best_combinations.render() output_final_technologies.render() gr.Markdown("---") gr.Markdown("### Prior Art Analysis") prior_art_button = gr.Button("Find Prior Art", elem_id="prior_art_button") output_prior_art.render() stock_technologies.render() stock_constraints.render() process_button.click( fn=process_input_gradio, inputs=input_problem, outputs=[ output_prompt, output_constraints, output_best_combinations, output_selected_ids, output_final_technologies, stock_technologies, stock_constraints ] ) prior_art_button.click( fn=gradio_prior_art, inputs=[stock_technologies, stock_constraints], outputs=output_prior_art ) gr.mount_gradio_app(app, gradio_app_blocks, path="/gradio") #if __name__ == "__main__": # gradio_app_blocks.launch()