Spaces:
Paused
Paused
Commit
路
5c96576
1
Parent(s):
11a4a60
Update app.py
Browse files
app.py
CHANGED
@@ -16,26 +16,46 @@ collection = chroma_client.create_collection(name="my_collection")
|
|
16 |
def generate_hash(text):
|
17 |
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Funci贸n para obtener embeddings del modelo
|
20 |
def get_embeddings(sentences):
|
|
|
21 |
embeddings = []
|
|
|
22 |
for sentence in sentences:
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
embeddings.append(np.array(results[0]['embedding']))
|
28 |
else:
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
39 |
|
40 |
# Funci贸n para comparar las sentencias
|
41 |
def calculate_similarity(args):
|
@@ -69,3 +89,5 @@ iface = gr.Interface(
|
|
69 |
|
70 |
# Inicia la interfaz de Gradio
|
71 |
iface.launch()
|
|
|
|
|
|
16 |
def generate_hash(text):
|
17 |
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
18 |
|
19 |
+
# Funci贸n para obtener embeddings del modelo
|
20 |
+
import sqlite3
|
21 |
+
import gradio as gr
|
22 |
+
from sentence_transformers import SentenceTransformer
|
23 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
24 |
+
import numpy as np
|
25 |
+
import multiprocessing
|
26 |
+
|
27 |
+
# Inicializa la base de datos y crea la tabla si no existe
|
28 |
+
conn = sqlite3.connect('embeddings.db')
|
29 |
+
c = conn.cursor()
|
30 |
+
c.execute('''CREATE TABLE IF NOT EXISTS embeddings
|
31 |
+
(sentence TEXT PRIMARY KEY, embedding BLOB)''')
|
32 |
+
conn.commit()
|
33 |
+
|
34 |
+
# Carga el modelo
|
35 |
+
model = SentenceTransformer('Maite89/Roberta_finetuning_semantic_similarity_stsb_multi_mt')
|
36 |
+
|
37 |
# Funci贸n para obtener embeddings del modelo
|
38 |
def get_embeddings(sentences):
|
39 |
+
# Intenta recuperar los embeddings de la base de datos
|
40 |
embeddings = []
|
41 |
+
new_sentences = []
|
42 |
for sentence in sentences:
|
43 |
+
c.execute('SELECT embedding FROM embeddings WHERE sentence=?', (sentence,))
|
44 |
+
result = c.fetchone()
|
45 |
+
if result:
|
46 |
+
embeddings.append(np.frombuffer(result[0], dtype=np.float32))
|
|
|
47 |
else:
|
48 |
+
new_sentences.append(sentence)
|
49 |
+
|
50 |
+
# Si hay nuevas sentencias, obt茅n los embeddings y almac茅nalos en la base de datos
|
51 |
+
if new_sentences:
|
52 |
+
new_embeddings = model.encode(new_sentences, show_progress_bar=False)
|
53 |
+
embeddings.extend(new_embeddings)
|
54 |
+
c.executemany('INSERT INTO embeddings VALUES (?,?)',
|
55 |
+
[(sent, emb.tobytes()) for sent, emb in zip(new_sentences, new_embeddings)])
|
56 |
+
conn.commit()
|
57 |
+
|
58 |
+
return embeddings
|
59 |
|
60 |
# Funci贸n para comparar las sentencias
|
61 |
def calculate_similarity(args):
|
|
|
89 |
|
90 |
# Inicia la interfaz de Gradio
|
91 |
iface.launch()
|
92 |
+
conn.close()
|
93 |
+
|