antagonico commited on
Commit
cb8f228
1 Parent(s): 7c17d6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -66
app.py CHANGED
@@ -1,93 +1,50 @@
1
- import gradio as gr
2
  from sentence_transformers import SentenceTransformer
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import numpy as np
5
- import multiprocessing
6
- import chromadb
7
- import hashlib
8
-
9
- # Carga el modelo
10
- model = SentenceTransformer('Maite89/Roberta_finetuning_semantic_similarity_stsb_multi_mt')
11
-
12
- # Crea el cliente ChromaDB
13
- chroma_client = chromadb.Client()
14
- collection = chroma_client.create_collection(name="my_collection")
15
-
16
- def generate_hash(text):
17
- return hashlib.md5(text.encode('utf-8')).hexdigest()
18
-
19
- # Funci贸n para obtener embeddings del modelo
20
- import sqlite3
21
  import gradio as gr
22
- from sentence_transformers import SentenceTransformer
23
- from sklearn.metrics.pairwise import cosine_similarity
24
- import numpy as np
25
- import multiprocessing
26
 
27
- # Inicializa la base de datos y crea la tabla si no existe
28
- conn = sqlite3.connect('embeddings.db')
29
- c = conn.cursor()
30
- c.execute('''CREATE TABLE IF NOT EXISTS embeddings
31
- (sentence TEXT PRIMARY KEY, embedding BLOB)''')
32
- conn.commit()
33
 
34
- # Carga el modelo
35
  model = SentenceTransformer('Maite89/Roberta_finetuning_semantic_similarity_stsb_multi_mt')
 
36
 
37
  # Funci贸n para obtener embeddings del modelo
38
  def get_embeddings(sentences):
39
- # Intenta recuperar los embeddings de la base de datos
40
- embeddings = []
41
- new_sentences = []
42
- for sentence in sentences:
43
- c.execute('SELECT embedding FROM embeddings WHERE sentence=?', (sentence,))
44
- result = c.fetchone()
45
- if result:
46
- embeddings.append(np.frombuffer(result[0], dtype=np.float32))
47
- else:
48
- new_sentences.append(sentence)
49
-
50
- # Si hay nuevas sentencias, obt茅n los embeddings y almac茅nalos en la base de datos
51
- if new_sentences:
52
- new_embeddings = model.encode(new_sentences, show_progress_bar=False)
53
- embeddings.extend(new_embeddings)
54
- c.executemany('INSERT INTO embeddings VALUES (?,?)',
55
- [(sent, emb.tobytes()) for sent, emb in zip(new_sentences, new_embeddings)])
56
- conn.commit()
57
-
58
- return embeddings
59
 
60
- # Funci贸n para comparar las sentencias
61
- def calculate_similarity(args):
62
- source_embedding, compare_embedding = args
63
- return cosine_similarity(source_embedding.reshape(1, -1), compare_embedding.reshape(1, -1))[0][0]
64
 
 
65
  def compare(source_sentence, compare_sentences):
66
  compare_list = compare_sentences.split("--")
67
 
68
- # Obtiene todos los embeddings a la vez para acelerar el proceso
69
  all_sentences = [source_sentence] + compare_list
70
  all_embeddings = get_embeddings(all_sentences)
71
 
72
- # Prepara los datos para el multiprocesamiento
73
  source_embedding = all_embeddings[0]
74
- data_for_multiprocessing = [(source_embedding, emb) for emb in all_embeddings[1:]]
75
-
76
- # Utiliza un pool de procesos para calcular las similitudes en paralelo
77
- with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
78
- similarities = pool.map(calculate_similarity, data_for_multiprocessing)
79
 
80
  return ', '.join([str(sim) for sim in similarities])
81
 
82
- # Define las interfaces de entrada y salida de Gradio
83
  iface = gr.Interface(
84
- fn=compare,
85
- inputs=["text", "text"],
86
- outputs="text",
87
- live=False
 
88
  )
89
 
90
- # Inicia la interfaz de Gradio
91
  iface.launch()
92
- conn.close()
93
 
 
 
1
  from sentence_transformers import SentenceTransformer
2
  from sklearn.metrics.pairwise import cosine_similarity
3
  import numpy as np
4
+ from accelerate import Accelerator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import gradio as gr
 
 
 
 
6
 
7
+ # Inicializa el Accelerator
8
+ accelerator = Accelerator()
 
 
 
 
9
 
10
+ # Cargar el modelo y colocarlo en el dispositivo adecuado
11
  model = SentenceTransformer('Maite89/Roberta_finetuning_semantic_similarity_stsb_multi_mt')
12
+ model, _ = accelerator.prepare(model, model)
13
 
14
  # Funci贸n para obtener embeddings del modelo
15
  def get_embeddings(sentences):
16
+ # Preparar los datos para ejecuci贸n acelerada
17
+ sentences = accelerator.prepare(sentences)
18
+ return model.encode(sentences, show_progress_bar=False, convert_to_tensor=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Funci贸n para calcular la similitud
21
+ def calculate_similarity(arguments):
22
+ source_embedding, compare_embedding = arguments
23
+ return cosine_similarity([source_embedding], [compare_embedding])[0][0]
24
 
25
+ # Funci贸n para comparar oraciones
26
  def compare(source_sentence, compare_sentences):
27
  compare_list = compare_sentences.split("--")
28
 
29
+ # Obtener todos los embeddings de una vez para acelerar el proceso
30
  all_sentences = [source_sentence] + compare_list
31
  all_embeddings = get_embeddings(all_sentences)
32
 
33
+ # No se necesita multiprocesamiento si usamos Accelerate ya que esto se maneja internamente
34
  source_embedding = all_embeddings[0]
35
+ similarities = [calculate_similarity((source_embedding, emb)) for emb in all_embeddings[1:]]
 
 
 
 
36
 
37
  return ', '.join([str(sim) for sim in similarities])
38
 
39
+ # Definir las interfaces de entrada y salida para Gradio
40
  iface = gr.Interface(
41
+ fn=compare,
42
+ inputs=[gr.inputs.Textbox(lines=2, placeholder="Enter source sentence here..."),
43
+ gr.inputs.Textbox(lines=10, placeholder="Enter sentences to compare, separated by '--'...")],
44
+ outputs=gr.outputs.Textbox(),
45
+ live=False
46
  )
47
 
48
+ # Iniciar la interfaz de Gradio
49
  iface.launch()
 
50