Spaces:
Runtime error
Runtime error
import pickle | |
import numpy as np | |
import pandas as pd | |
import nltk | |
from nltk.stem import * | |
nltk.download("punkt_tab") | |
from pathlib import Path | |
import os | |
import google.generativeai as genai | |
import json | |
from google.genai import Client, types | |
BASE_DIR = Path(__file__).resolve().parent.parent | |
FILE_PATH = BASE_DIR / 'ressources' / 'global_tech_embeddings.pkl' | |
def set_prompt(problem): | |
prompt = """ | |
# ROLE | |
You are a meticulous senior technical analyst and constraints scout. Your task is to read a small description of a technical problem and identify distinct constraints each related to the problem and ensuring that the whole problem is encompassed by each constraints into a JSON object. | |
# OBJECTIVE | |
Find all the constraints in this technical problem making sure each are premised on the problem only. | |
Take into account different technical domains to encompass the whole problem. | |
Output each constraints in a JSON such as : {"title of the constraints1":"description1","title of the constraintsN":"descriptionN"} | |
# INSTRUCTIONS & RULES | |
1. **JSON Output**: Your entire response MUST be a single JSON code block starting with a hyphen (`-`) to denote a list. Do not include any explanatory text before or after the JSON. | |
2 **Discover and Iterate**: Your primary task is to scan the technical problem, find each constraints and create a seperate entry for it in the output JSON. | |
3. **Descriptive Sentences**: You MUST write clear, full sentences that describe the constraints's issues. Do not use single keywords. These descriptions should be based on the information in the technical problem. | |
4. **Infer Where Necessary**: The technical problem may not contain all details. Infer plausible information based on the context. | |
# JSON SCHEMA & EXAMPLE | |
{ | |
'Exposing Compute Resources': 'The 6G network shall provide suitable APIs to allow authorized third parties and/or UEs to retrieve availability information about computational resources inside the Service Hosting Environment (SHE) and to utilize these computational resources for running workloads on demand.', | |
'Providing AI Compute': 'The 6G network shall be able to provide computing resources in the Service Hosting Environment for AI services and provide AI services to UEs.', | |
... | |
} | |
--- | |
***NOW, BEGIN THE TASK.*** | |
# TECHNICAL PROBLEM | |
""" + problem | |
return prompt | |
def load_technologies_excel(): | |
df = pd.read_excel(FILE_PATH) | |
return df | |
def load_technologies(): | |
EMBEDDINGS_FILE = FILE_PATH | |
try: | |
with open(EMBEDDINGS_FILE, 'rb') as f: | |
loaded_data = pickle.load(f) | |
global_tech = loaded_data['global_tech'] | |
global_tech_embedding = loaded_data['global_tech_embeddings'] | |
return global_tech, global_tech_embedding | |
except Exception as e: | |
print(f"Error: {e}") | |
def tech_to_dict(technologies): | |
tech_dict = [] | |
for index, tech in enumerate(technologies): | |
if not tech.find("<title>") > 1: | |
tab = tech.split("\n") | |
tab.pop(0) | |
tab.pop(len(tab)-1) | |
tech_dict.append({"title": tab[0][tab[0].find(": ")+2:], | |
"purpose": tab[1][tab[1].find(": ")+2:], | |
"key_components": tab[2][tab[2].find(": ")+2:], | |
"advantages": tab[3][tab[3].find(": ")+2:], | |
"limitations": tab[4][tab[4].find(": ")+2:], | |
"id": index}) | |
return tech_dict | |
def save_dataframe(df, title): | |
pd.DataFrame(df).to_excel(title) | |
return title | |
def stem(data,data_type): | |
stemmer = SnowballStemmer("english") | |
processed_data = [] | |
if data_type == "technologies": | |
for t_item in data: | |
processed_data.append({ | |
"title": stemmer.stem(t_item["title"]), | |
"purpose": stemmer.stem(t_item["purpose"]), | |
"key_components": stemmer.stem(t_item["key_components"]), | |
"advantages": stemmer.stem(t_item["advantages"]), | |
"limitations": stemmer.stem(t_item["limitations"]), | |
"id": t_item["id"] | |
}) | |
else: | |
for t_item in data: | |
print(t_item) | |
processed_data.append({ | |
"title": stemmer.stem(t_item), | |
"description": stemmer.stem(data[t_item]) | |
}) | |
return processed_data | |
def get_technologies_by_id(id_list, technologies): | |
result = [] | |
id_set = set(id_list) | |
for tech in technologies: | |
if tech.get('id') in id_set: | |
result.append(tech) | |
return result | |
def save_to_pickle(result_similarites): | |
constraint_titles = sorted(list(set([item['constraint']['title'] for item in result_similarites]))) | |
max_id2 = max([item['id2'] for item in result_similarites]) | |
row_label_to_index = {title: i for i, title in enumerate(constraint_titles)} | |
col_labels = list(range(1, max_id2 + 1)) | |
num_rows = len(constraint_titles) | |
num_cols = max_id2 | |
matrix = np.full((num_rows, num_cols), np.nan, dtype=np.float32) | |
for item in result_similarites: | |
row_idx = row_label_to_index[item['constraint']['title']] | |
col_idx = item['id2'] - 1 # | |
similarity_value = item['similarity'].item() | |
matrix[row_idx, col_idx] = similarity_value | |
print(f"Successfully created matrix with shape: {matrix.shape}") | |
print(f"Number of rows (unique constraints): {num_rows}") | |
print(f"Number of columns (max id2): {num_cols}") | |
print("\nExample 5x5 block of the created matrix (NaN for missing values):") | |
print(matrix[:5, :5]) | |
output_filename = "cosine_similarity_matrix_with_labels.pkl" | |
data_to_save = { | |
'matrix': matrix, | |
'row_labels': constraint_titles, | |
'col_labels': col_labels | |
} | |
with open(output_filename, 'wb') as f: | |
pickle.dump(data_to_save, f) | |
print(f"\nMatrix and labels saved to {output_filename}") | |
return output_filename | |
def set_gemini(): | |
gemini_api = os.getenv("GEMINI_API") | |
client = Client(api_key=gemini_api) | |
# Define the grounding tool | |
grounding_tool = types.Tool( | |
google_search=types.GoogleSearch() | |
) | |
# Configure generation settings | |
config = types.GenerateContentConfig( | |
tools=[grounding_tool] | |
) | |
return client,config |