File size: 6,300 Bytes
1f05644
 
 
 
 
 
 
 
b906506
 
 
 
 
 
 
 
 
1f05644
 
7a18454
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f05644
 
 
 
 
 
 
b906506
1f05644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd7eda0
1f05644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b906506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import pickle
import numpy as np
import pandas as pd

import nltk
from nltk.stem import *
nltk.download("punkt_tab")

from pathlib import Path
import os
import google.generativeai as genai
import json
from google.genai import Client, types

BASE_DIR = Path(__file__).resolve().parent.parent

FILE_PATH = BASE_DIR / 'ressources' / 'global_tech_embeddings.pkl'

def set_prompt(problem):
    prompt = """
# ROLE

You are a meticulous senior technical analyst and constraints scout. Your task is to read a small description of a technical problem and identify distinct constraints each related to the problem and ensuring that the whole problem is encompassed by each constraints into a JSON object.

# OBJECTIVE

Find all the constraints in this technical problem making sure each are premised on the problem only. 
Take into account different technical domains to encompass the whole problem. 
Output each constraints in a JSON such as : {"title of the constraints1":"description1","title of the constraintsN":"descriptionN"}

# INSTRUCTIONS & RULES

1.  **JSON Output**: Your entire response MUST be a single JSON code block starting with a hyphen (`-`) to denote a list. Do not include any explanatory text before or after the JSON.
2   **Discover and Iterate**: Your primary task is to scan the technical problem, find each constraints and create a seperate entry for it in the output JSON.
3.  **Descriptive Sentences**: You MUST write clear, full sentences that describe the constraints's issues. Do not use single keywords. These descriptions should be based on the information in the technical problem.
4.  **Infer Where Necessary**: The technical problem may not contain all details. Infer plausible information based on the context.

# JSON SCHEMA & EXAMPLE

{
  'Exposing Compute Resources': 'The 6G network shall provide suitable APIs to allow authorized third parties and/or UEs to retrieve availability information about computational resources inside the Service Hosting Environment (SHE) and to utilize these computational resources for running workloads on demand.', 
  'Providing AI Compute': 'The 6G network shall be able to provide computing resources in the Service Hosting Environment for AI services and provide AI services to UEs.',
  ... 
}

---
***NOW, BEGIN THE TASK.***

# TECHNICAL PROBLEM

""" + problem
    return prompt

def load_technologies_excel():
    df = pd.read_excel(FILE_PATH)
    return df

def load_technologies():
    EMBEDDINGS_FILE = FILE_PATH

    try:
        with open(EMBEDDINGS_FILE, 'rb') as f:
            loaded_data = pickle.load(f)
        global_tech = loaded_data['global_tech']
        global_tech_embedding = loaded_data['global_tech_embeddings']
        return global_tech, global_tech_embedding
    except Exception as e:
        print(f"Error: {e}")

def tech_to_dict(technologies):
    tech_dict = []
    for index, tech in enumerate(technologies):
        if not tech.find("<title>") > 1:
            tab = tech.split("\n")
            tab.pop(0)
            tab.pop(len(tab)-1)
            tech_dict.append({"title": tab[0][tab[0].find(": ")+2:],
                                "purpose": tab[1][tab[1].find(": ")+2:],
                                "key_components": tab[2][tab[2].find(": ")+2:],
                                "advantages": tab[3][tab[3].find(": ")+2:],
                                "limitations": tab[4][tab[4].find(": ")+2:],
                                "id": index})
    return tech_dict

def save_dataframe(df, title):
    pd.DataFrame(df).to_excel(title)
    return title

def stem(data,data_type):
    stemmer = SnowballStemmer("english")
    processed_data = []
    if data_type == "technologies":
      for t_item in data:
          processed_data.append({
              "title": stemmer.stem(t_item["title"]),
              "purpose": stemmer.stem(t_item["purpose"]),
              "key_components": stemmer.stem(t_item["key_components"]),
              "advantages": stemmer.stem(t_item["advantages"]),
              "limitations": stemmer.stem(t_item["limitations"]),
              "id": t_item["id"]
          })
    else:
      for t_item in data:
          print(t_item)
          processed_data.append({
              "title": stemmer.stem(t_item),
              "description": stemmer.stem(data[t_item])
              })

    return processed_data


def get_technologies_by_id(id_list, technologies):
    result = []
    id_set = set(id_list)
    for tech in technologies:
        if tech.get('id') in id_set:
            result.append(tech)
    return result

def save_to_pickle(result_similarites):

    constraint_titles = sorted(list(set([item['constraint']['title'] for item in result_similarites])))
    max_id2 = max([item['id2'] for item in result_similarites])

    row_label_to_index = {title: i for i, title in enumerate(constraint_titles)}
    col_labels = list(range(1, max_id2 + 1))

    num_rows = len(constraint_titles)
    num_cols = max_id2

    matrix = np.full((num_rows, num_cols), np.nan, dtype=np.float32)

    for item in result_similarites:
        row_idx = row_label_to_index[item['constraint']['title']]
        col_idx = item['id2'] - 1 # 
        similarity_value = item['similarity'].item()

        matrix[row_idx, col_idx] = similarity_value

    print(f"Successfully created matrix with shape: {matrix.shape}")
    print(f"Number of rows (unique constraints): {num_rows}")
    print(f"Number of columns (max id2): {num_cols}")
    print("\nExample 5x5 block of the created matrix (NaN for missing values):")
    print(matrix[:5, :5])

    output_filename = "cosine_similarity_matrix_with_labels.pkl"
    data_to_save = {
        'matrix': matrix,
        'row_labels': constraint_titles,
        'col_labels': col_labels
    }

    with open(output_filename, 'wb') as f:
        pickle.dump(data_to_save, f)

    print(f"\nMatrix and labels saved to {output_filename}")
    return output_filename

    
def set_gemini():
    gemini_api = os.getenv("GEMINI_API")
    client = Client(api_key=gemini_api)

    # Define the grounding tool
    grounding_tool = types.Tool(
        google_search=types.GoogleSearch()
    )

    # Configure generation settings
    config = types.GenerateContentConfig(
        tools=[grounding_tool]
    )

    return client,config