File size: 12,918 Bytes
1f05644
 
 
8a495ab
 
 
 
1f05644
 
 
 
 
 
8a495ab
1f05644
 
 
 
 
 
 
 
b0355df
 
6117e08
1f05644
 
 
4c02ba8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22e1752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
from src.services.utils import *
from src.services.processor import *

global_tech, global_tech_embeddings = load_technologies()


def process_input(data, global_tech, global_tech_embeddings):
    prompt = set_prompt(data.problem)

    constraints = retrieve_constraints(prompt)
    constraints_stemmed = stem(constraints, "constraints")
    
    save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx")
        
    #global_tech, keys, original_tech = preprocess_tech_data(df)
    
    save_dataframe(global_tech, "global_tech.xlsx")
    
    result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, )
    
    save_to_pickle(result_similarities)
    
    print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}")
    
    best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix)
    best_technologies_id = select_technologies(best_combinations)
    best_technologies = get_technologies_by_id(best_technologies_id,global_tech)
    
    return best_technologies


def process_input_from_constraints(constraints, global_tech, global_tech_embeddings):
    
    constraints_stemmed = stem(constraints, "constraints")
    
    save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx")
        
    #global_tech, keys, original_tech = preprocess_tech_data(df)
    
    save_dataframe(global_tech, "global_tech.xlsx")
    
    result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, )
    
    save_to_pickle(result_similarities)
    
    print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}")
    
    best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix)
    best_technologies_id = select_technologies(best_combinations)
    best_technologies = get_technologies_by_id(best_technologies_id,global_tech)
    
    return best_technologies


import gradio as gr
import pandas as pd
import numpy as np
import random
import json

# --- Dummy Implementations for src.services.utils and src.services.processor ---
# These functions simulate the behavior of your actual services for the Gradio interface.

def load_technologies():
    """
    Dummy function to simulate loading technologies and their embeddings.
    Returns a sample DataFrame and a dummy numpy array for embeddings.
    """
    tech_data = {
        'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'name': [
            'Machine Learning', 'Cloud Computing', 'Blockchain', 'Cybersecurity',
            'Data Analytics', 'Artificial Intelligence', 'DevOps', 'Quantum Computing',
            'Edge Computing', 'Robotics'
        ],
        'description': [
            'Algorithms for learning from data.', 'On-demand computing resources.',
            'Decentralized ledger technology.', 'Protecting systems from threats.',
            'Analyzing large datasets.', 'Simulating human intelligence.',
            'Software development and operations.', 'Utilizing quantum mechanics.',
            'Processing data near the source.', 'Automated machines.'
        ]
    }
    global_tech_df = pd.DataFrame(tech_data)
    # Simulate embeddings as random vectors
    global_tech_embeddings_array = np.random.rand(len(global_tech_df), 128)
    return global_tech_df, global_tech_embeddings_array

def set_prompt(problem_description: str) -> str:
    """
    Dummy function to simulate prompt generation.
    """
    return f"Based on the problem: '{problem_description}', what are the key technical challenges and requirements?"

def retrieve_constraints(prompt: str) -> list[str]:
    """
    Dummy function to simulate constraint retrieval.
    Returns a few sample constraints based on the prompt.
    """
    if "security" in prompt.lower() or "secure" in prompt.lower():
        return ["high security", "data privacy", "authentication"]
    elif "performance" in prompt.lower() or "speed" in prompt.lower():
        return ["low latency", "high throughput", "scalability"]
    elif "data" in prompt.lower() or "analyze" in prompt.lower():
        return ["data integration", "real-time analytics", "data storage"]
    return ["cost-efficiency", "ease of integration", "maintainability", "scalability"]

def stem(text_list: list[str], type_of_text: str) -> list[str]:
    """
    Dummy function to simulate stemming.
    Simplistically removes 'ing', 's', 'es' from words.
    """
    stemmed_list = []
    for text in text_list:
        words = text.split()
        stemmed_words = []
        for word in words:
            word = word.lower()
            if word.endswith("ing"):
                word = word[:-3]
            elif word.endswith("es"):
                word = word[:-2]
            elif word.endswith("s"):
                word = word[:-1]
            stemmed_words.append(word)
        stemmed_list.append(" ".join(stemmed_words))
    return stemmed_list

def save_dataframe(df: pd.DataFrame, filename: str):
    """
    Dummy function to simulate saving a DataFrame.
    """
    print(f"Simulating saving DataFrame to {filename}")
    # In a real scenario, you might save to Excel: df.to_excel(filename, index=False)

def save_to_pickle(data):
    """
    Dummy function to simulate saving data to a pickle file.
    """
    print(f"Simulating saving data to pickle: {type(data)}")

def get_contrastive_similarities(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, global_tech_embeddings: np.ndarray):
    """
    Dummy function to simulate getting contrastive similarities.
    Returns a dummy similarity matrix and result similarities.
    """
    num_constraints = len(constraints_stemmed)
    num_tech = len(global_tech_df)

    # Simulate a similarity matrix
    # Each row corresponds to a constraint, each column to a technology
    matrix = np.random.rand(num_constraints, num_tech)
    matrix = np.round(matrix, 3) # Round for better display

    # Simulate result_similarities (e.g., top 3 technologies for each constraint)
    result_similarities = {}
    for i, constraint in enumerate(constraints_stemmed):
        # Get top 3 tech indices for this constraint
        top_tech_indices = np.argsort(matrix[i])[::-1][:3]
        top_tech_names = [global_tech_df.iloc[idx]['name'] for idx in top_tech_indices]
        top_tech_scores = [matrix[i, idx] for idx in top_tech_indices]
        result_similarities[constraint] = list(zip(top_tech_names, top_tech_scores))

    return result_similarities, matrix

def find_best_list_combinations(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, matrix: np.ndarray) -> list[dict]:
    """
    Dummy function to simulate finding best list combinations.
    Returns a few dummy combinations of technologies.
    """
    best_combinations = []
    # Simulate finding combinations that best cover constraints
    for i in range(min(3, len(constraints_stemmed))): # Create up to 3 dummy combinations
        combination = {
            "technologies": [],
            "score": round(random.uniform(0.7, 0.95), 2),
            "covered_constraints": []
        }
        num_tech_in_combo = random.randint(2, 4)
        selected_tech_ids = random.sample(global_tech_df['id'].tolist(), num_tech_in_combo)
        for tech_id in selected_tech_ids:
            tech_name = global_tech_df[global_tech_df['id'] == tech_id]['name'].iloc[0]
            combination["technologies"].append({"id": tech_id, "name": tech_name})
        
        # Assign some random constraints to be covered
        num_covered_constraints = random.randint(1, len(constraints_stemmed))
        combination["covered_constraints"] = random.sample(constraints_stemmed, num_covered_constraints)
        
        best_combinations.append(combination)
    return best_combinations

def select_technologies(best_combinations: list[dict]) -> list[int]:
    """
    Dummy function to simulate selecting technologies based on best combinations.
    Returns a list of unique technology IDs.
    """
    selected_ids = set()
    for combo in best_combinations:
        for tech in combo["technologies"]:
            selected_ids.add(tech["id"])
    return list(selected_ids)

def get_technologies_by_id(tech_ids: list[int], global_tech_df: pd.DataFrame) -> list[dict]:
    """
    Dummy function to simulate retrieving technology details by ID.
    """
    selected_technologies = []
    for tech_id in tech_ids:
        tech_info = global_tech_df[global_tech_df['id'] == tech_id]
        if not tech_info.empty:
            selected_technologies.append(tech_info.iloc[0].to_dict())
    return selected_technologies

# --- Core Logic (Modified for Gradio Interface) ---

# Load global technologies and embeddings once when the app starts
global_tech_df, global_tech_embeddings_array = load_technologies()

def process_input_gradio(problem_description: str):
    """
    Processes the input problem description step-by-step for Gradio.
    Returns all intermediate results.
    """
    # Step 1: Set Prompt
    prompt = set_prompt(problem_description)

    # Step 2: Retrieve Constraints
    constraints = retrieve_constraints(prompt)

    # Step 3: Stem Constraints
    constraints_stemmed = stem(constraints, "constraints")
    save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx")

    # Step 4: Global Tech (already loaded, just acknowledge)
    # save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading

    # Step 5: Get Contrastive Similarities
    result_similarities, matrix = get_contrastive_similarities(
        constraints_stemmed, global_tech_df, global_tech_embeddings_array
    )
    save_to_pickle(result_similarities)

    # Step 6: Find Best List Combinations
    best_combinations = find_best_list_combinations(constraints_stemmed, global_tech_df, matrix)

    # Step 7: Select Technologies
    best_technologies_id = select_technologies(best_combinations)

    # Step 8: Get Technologies by ID
    best_technologies = get_technologies_by_id(best_technologies_id, global_tech_df)

    # Format outputs for Gradio
    # Convert numpy array to list of lists for better Gradio display
    matrix_display = matrix.tolist()
    
    # Convert result_similarities to a more readable format for Gradio
    result_similarities_display = {
        k: ", ".join([f"{name} ({score:.3f})" for name, score in v])
        for k, v in result_similarities.items()
    }
    
    best_combinations_display = json.dumps(best_combinations, indent=2)
    best_technologies_display = json.dumps(best_technologies, indent=2)

    return (
        prompt,
        ", ".join(constraints),
        ", ".join(constraints_stemmed),
        "Global technologies loaded and ready.", # Acknowledge tech loading
        str(result_similarities_display), # Convert dict to string for display
        pd.DataFrame(matrix_display, index=constraints_stemmed, columns=global_tech_df['name']), # Display matrix as DataFrame
        best_combinations_display,
        ", ".join(map(str, best_technologies_id)),
        best_technologies_display
    )

# --- Gradio Interface Setup ---

# Define the input and output components
input_problem = gr.Textbox(
    label="Enter Problem Description",
    placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics."
)

output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False)
output_constraints = gr.Textbox(label="2. Retrieved Constraints", interactive=False)
output_stemmed_constraints = gr.Textbox(label="3. Stemmed Constraints", interactive=False)
output_tech_loaded = gr.Textbox(label="4. Global Technologies Status", interactive=False)
output_similarities = gr.Textbox(label="5. Result Similarities (Constraint -> Top Technologies)", interactive=False)
output_matrix = gr.Dataframe(label="6. Similarity Matrix (Constraints vs. Technologies)", interactive=False)
output_best_combinations = gr.JSON(label="7. Best Technology Combinations Found", interactive=False)
output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False)
output_final_technologies = gr.JSON(label="9. Final Best Technologies", interactive=False)


# Create the Gradio Interface
gr.Interface(
    fn=process_input_gradio,
    inputs=input_problem,
    outputs=[
        output_prompt,
        output_constraints,
        output_stemmed_constraints,
        output_tech_loaded,
        output_similarities,
        output_matrix,
        output_best_combinations,
        output_selected_ids,
        output_final_technologies
    ],
    title="Insight Finder: Step-by-Step Technology Selection",
    description="Enter a problem description to see how relevant technologies are identified through various processing steps.",
    allow_flagging="never"
).launch()