|
import os |
|
os.system('pip install curl_cffi tqdm bitsandbytes tiktoken g4f pinecone-client pandas datasets sentence-transformers') |
|
|
|
|
|
import os |
|
from g4f import ChatCompletion |
|
|
|
from pinecone import Pinecone |
|
import pandas as pd |
|
from datasets import Dataset |
|
from sentence_transformers import SentenceTransformer |
|
import gradio as gr |
|
|
|
model_name = "BAAI/bge-m3" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PINECONE_API_KEY = "3a3e9022-381d-436e-84cb-ba93464d283e" |
|
PINECONE_ENVIRONMENT = "us-east-1" |
|
|
|
|
|
pc = Pinecone(api_key=PINECONE_API_KEY) |
|
|
|
|
|
EMBED_MODEL = 'BGE_M3-1024' |
|
DIMENSIONS = 1024 |
|
|
|
|
|
print(f"Model selected: {EMBED_MODEL}") |
|
print(f"Dimensions set as: {DIMENSIONS}") |
|
|
|
|
|
def print_current_selection(): |
|
print(f"Currently selected model: {EMBED_MODEL}") |
|
print(f"Dimensions: {DIMENSIONS}") |
|
|
|
|
|
INDEX_NAME = 'vestidos' |
|
|
|
|
|
|
|
|
|
def connect_to_pinecone(index_name): |
|
global INDEX_NAME |
|
try: |
|
pc = Pinecone(api_key=PINECONE_API_KEY) |
|
index = pc.Index(index_name) |
|
|
|
|
|
index_stats = index.describe_index_stats() |
|
print(f"Successfully connected to Pinecone index '{index_name}'!") |
|
print("Index Stats:", index_stats) |
|
|
|
|
|
INDEX_NAME = index_name |
|
print(f"Global INDEX_NAME updated to: {INDEX_NAME}") |
|
|
|
except Exception as e: |
|
print(f"Failed to connect to Pinecone index '{index_name}':", str(e)) |
|
|
|
|
|
connect_to_pinecone(INDEX_NAME) |
|
|
|
|
|
def print_current_index(): |
|
print(f"Current index name: {INDEX_NAME}") |
|
|
|
|
|
if 'INDEX_NAME' not in globals() or INDEX_NAME is None: |
|
raise ValueError("INDEX_NAME is not set. Please set the index name first.") |
|
|
|
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None: |
|
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.") |
|
|
|
|
|
|
|
pc = Pinecone(api_key=PINECONE_API_KEY) |
|
|
|
|
|
index = pc.Index(INDEX_NAME) |
|
|
|
|
|
index_stats = index.describe_index_stats() |
|
vector_dim = index_stats['dimension'] |
|
print(f"Index dimension: {vector_dim}") |
|
|
|
|
|
CONTEXT_FIELDS = ['Etiqueta', 'Pregunta 1', 'Pregunta 2', 'Pregunta 3', 'Respuesta Combinada'] |
|
LINK_FIELDS = ['Etiqueta', 'Respuesta Combinada'] |
|
|
|
|
|
print(f"Context fields set to: {CONTEXT_FIELDS}") |
|
print(f"Link fields set to: {LINK_FIELDS}") |
|
|
|
|
|
def get_field_selections(): |
|
return { |
|
"CONTEXT_FIELDS": CONTEXT_FIELDS, |
|
"LINK_FIELDS": LINK_FIELDS |
|
} |
|
|
|
|
|
|
|
|
|
if 'EMBED_MODEL' not in globals() or EMBED_MODEL is None: |
|
raise ValueError("EMBED_MODEL is not set. Please select an embedding model first.") |
|
if 'INDEX_NAME' not in globals() or INDEX_NAME is None: |
|
raise ValueError("INDEX_NAME is not set. Please create or select an index first.") |
|
if 'CONTEXT_FIELDS' not in globals() or 'LINK_FIELDS' not in globals(): |
|
raise ValueError("CONTEXT_FIELDS and LINK_FIELDS are not set. Please run the field selection cell first.") |
|
|
|
|
|
embedding_model = SentenceTransformer(model_name) |
|
|
|
|
|
pinecone_client = Pinecone(api_key=PINECONE_API_KEY) |
|
index = pinecone_client.Index(INDEX_NAME) |
|
|
|
|
|
LIMIT = 3750 |
|
|
|
def vector_search(query): |
|
|
|
xq = embedding_model.encode(query) |
|
|
|
|
|
res = index.query(vector=xq.tolist(), top_k=3, include_metadata=True) |
|
if res['matches']: |
|
return [ |
|
{ |
|
'content': ' '.join(f"{k}: {v}" for k, v in match['metadata'].items() if k in CONTEXT_FIELDS and k != 'Etiqueta'), |
|
'metadata': match['metadata'] |
|
} |
|
for match in res['matches'] |
|
if 'metadata' in match |
|
] |
|
return [] |
|
|
|
def create_prompt(query, contexts): |
|
prompt_start = "\n\nContexto:\n" |
|
prompt_end = f"\n\nPregunta: {query}\nRespuesta:" |
|
|
|
current_contexts = "\n\n---\n\n".join([context['content'] for context in contexts]) |
|
if len(prompt_start + current_contexts + prompt_end) >= LIMIT: |
|
|
|
available_space = LIMIT - len(prompt_start) - len(prompt_end) |
|
truncated_contexts = current_contexts[:available_space] |
|
return prompt_start + truncated_contexts + prompt_end |
|
else: |
|
return prompt_start + current_contexts + prompt_end |
|
|
|
def complete(prompt): |
|
return [f"Hola"] |
|
|
|
def check_image_exists(filepath): |
|
return os.path.exists(filepath) |
|
|
|
def chat_function(message, history): |
|
|
|
search_results = vector_search(message) |
|
|
|
|
|
query_with_contexts = create_prompt(message, search_results) |
|
|
|
|
|
response = complete(query_with_contexts) |
|
|
|
partial_message = response[0].split("\n")[0] |
|
|
|
|
|
relevant_links = [result['metadata'].get(field) for result in search_results for field in LINK_FIELDS if field in result['metadata']] |
|
full_response = partial_message |
|
image_url = None |
|
tags_detected = [] |
|
|
|
filtered_links = [] |
|
if relevant_links: |
|
for link in relevant_links: |
|
if any(tag in link for tag in ["lila_61", "lila_63", "lila_62", "lila_64", "fuxia_70", "fuxia_71", "fuxia_72", "fuxia_73", "fuxia_74", "melon_68", "melon_66", "melon_67", "melon_65", "vino_19", "vino_20", "barney_69", "loro_27", "lacre_02", "amarillo_03", "amarillo_04", "azulino_11", "azulino_14", "azulino_12", "azulino_13", "beigs_09", "beigs_10", "beigs_07", "beigs_06", "beigs_08", "beigs_05", "marina_32", "marina_29", "marina_28", "marina_31", "marina_30", "rojo_26", "rojo_23", "rojo_21", "rojo_22", "rojo_25", "rojo_24", "celeste_40", "celeste_38", "celeste_39", "celeste_33", "celeste_35", "celeste_37", "celeste_41", "celeste_42", "celeste_34", "celeste_36", "sirenita_01", "marino_18", "marino_17", "marino_16", "marino_15", "rosa_87", "rosa_86", "rosa_79", "rosa_82", "rosa_83", "rosa_78", "rosa_84", "rosa_85", "rosa_75", "rosa_80", "rosa_81", "rosa_77", "rosa_76", "blanco_55", "blanco_56", "blanco_53", "blanco_52", "blanco_57", "blanco_49", "blanco_51", "blanco_60", "blanco_47", "blanco_44", "blanco_50", "blanco_48", "blanco_59", "blanco_43", "blanco_58", "blanco_46", "blanco_45", "blanco_54"]): |
|
tags_detected.append(link) |
|
else: |
|
filtered_links.append(link) |
|
|
|
|
|
if filtered_links: |
|
full_response += f".\n\nTe detallamos nuestro contenido a continuaci贸n:\n" + filtered_links[0] |
|
|
|
|
|
tags_to_images = { |
|
"lila_61": "images/lila_61.jpeg", |
|
"lila_63": "images/lila_63.jpeg", |
|
"lila_62": "images/lila_62.jpeg", |
|
"lila_64": "images/lila_64.jpeg", |
|
"fuxia_70": "images/fuxia_70.jpeg", |
|
"fuxia_71": "images/fuxia_71.jpeg", |
|
"fuxia_72": "images/fuxia_72.jpeg", |
|
"fuxia_73": "images/fuxia_73.jpeg", |
|
"fuxia_74": "images/fuxia_74.jpeg", |
|
"melon_68": "images/melon_68.jpeg", |
|
"melon_66": "images/melon_66.jpeg", |
|
"melon_67": "images/melon_67.jpeg", |
|
"melon_65": "images/melon_65.jpeg", |
|
"vino_19": "images/vino_19.jpeg", |
|
"vino_20": "images/vino_20.jpeg", |
|
"barney_69": "images/barney_69.jpeg", |
|
"loro_27": "images/loro_27.png", |
|
"lacre_02": "images/lacre_02.jpeg", |
|
"amarillo_03": "images/amarillo_03.jpeg", |
|
"amarillo_04": "images/amarillo_04.jpeg", |
|
"azulino_11": "images/azulino_11.jpeg", |
|
"azulino_14": "images/azulino_14.jpeg", |
|
"azulino_12": "images/azulino_12.jpeg", |
|
"azulino_13": "images/azulino_13.jpeg", |
|
"beigs_09": "images/beigs_09.jpeg", |
|
"beigs_10": "images/beigs_10.jpeg", |
|
"beigs_07": "images/beigs_07.jpeg", |
|
"beigs_06": "images/beigs_06.jpeg", |
|
"beigs_08": "images/beigs_08.jpeg", |
|
"beigs_05": "images/beigs_05.jpeg", |
|
"marina_32": "images/marina_32.jpeg", |
|
"marina_29": "images/marina_29.jpeg", |
|
"marina_28": "images/marina_28.jpeg", |
|
"marina_31": "images/marina_31.jpeg", |
|
"marina_30": "images/marina_30.jpeg", |
|
"rojo_26": "images/rojo_26.jpeg", |
|
"rojo_23": "images/rojo_23.jpeg", |
|
"rojo_21": "images/rojo_21.jpeg", |
|
"rojo_22": "images/rojo_22.jpeg", |
|
"rojo_25": "images/rojo_25.jpeg", |
|
"rojo_24": "images/rojo_24.jpeg", |
|
"celeste_40": "images/celeste_40.jpeg", |
|
"celeste_38": "images/celeste_38.jpeg", |
|
"celeste_39": "images/celeste_39.jpeg", |
|
"celeste_33": "images/celeste_33.jpeg", |
|
"celeste_35": "images/celeste_35.jpeg", |
|
"celeste_37": "images/celeste_37.jpeg", |
|
"celeste_41": "images/celeste_41.jpeg", |
|
"celeste_42": "images/celeste_42.jpeg", |
|
"celeste_34": "images/celeste_34.jpeg", |
|
"celeste_36": "images/celeste_36.jpeg", |
|
"sirenita_01": "images/sirenita_01.png", |
|
"marino_18": "images/marino_18.jpeg", |
|
"marino_17": "images/marino_17.jpeg", |
|
"marino_16": "images/marino_16.jpeg", |
|
"marino_15": "images/marino_15.jpeg", |
|
"rosa_87": "images/rosa_87.jpeg", |
|
"rosa_86": "images/rosa_86.png", |
|
"rosa_79": "images/rosa_79.jpeg", |
|
"rosa_82": "images/rosa_82.png", |
|
"rosa_83": "images/rosa_83.jpeg", |
|
"rosa_78": "images/rosa_78.jpeg", |
|
"rosa_84": "images/rosa_84.jpeg", |
|
"rosa_85": "images/rosa_85.jpeg", |
|
"rosa_75": "images/rosa_75.jpeg", |
|
"rosa_80": "images/rosa_80.png", |
|
"rosa_81": "images/rosa_81.png", |
|
"rosa_77": "images/rosa_77.jpeg", |
|
"rosa_76": "images/rosa_76.png", |
|
"blanco_55": "images/blanco_55.jpeg", |
|
"blanco_56": "images/blanco_56.jpeg", |
|
"blanco_53": "images/blanco_53.jpeg", |
|
"blanco_52": "images/blanco_52.jpeg", |
|
"blanco_57": "images/blanco_57.jpeg", |
|
"blanco_49": "images/blanco_49.jpeg", |
|
"blanco_51": "images/blanco_51.jpeg", |
|
"blanco_60": "images/blanco_60.jpeg", |
|
"blanco_47": "images/blanco_47.jpeg", |
|
"blanco_44": "images/blanco_44.jpeg", |
|
"blanco_50": "images/blanco_50.jpeg", |
|
"blanco_48": "images/blanco_48.jpeg", |
|
"blanco_59": "images/blanco_59.jpeg", |
|
"blanco_43": "images/blanco_43.jpeg", |
|
"blanco_58": "images/blanco_58.png", |
|
"blanco_46": "images/blanco_46.jpeg", |
|
"blanco_45": "images/blanco_45.jpeg", |
|
"blanco_54": "images/blanco_54.jpeg", |
|
} |
|
|
|
|
|
|
|
for tag in tags_detected: |
|
for key, path in tags_to_images.items(): |
|
if key in tag and check_image_exists(path): |
|
image_url = path |
|
break |
|
|
|
return full_response, image_url |
|
|
|
|
|
def update_image(image_url): |
|
if image_url: |
|
return image_url |
|
else: |
|
return None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
chatbot_input = gr.Textbox(label="Tu mensaje") |
|
chatbot_output = gr.Chatbot(label="ChatBot") |
|
chatbot_history = gr.State(value=[]) |
|
image_url = gr.State(value=None) |
|
submit_button = gr.Button("Enviar") |
|
with gr.Column(scale=1): |
|
image_output = gr.Image(label="Imagen asociada") |
|
|
|
def process_input(message, history): |
|
full_response, image = chat_function(message, history) |
|
history.append((message, full_response)) |
|
return history, history, image |
|
|
|
submit_button.click(process_input, inputs=[chatbot_input, chatbot_history], outputs=[chatbot_output, chatbot_history, image_url]) |
|
image_url.change(fn=update_image, inputs=image_url, outputs=image_output) |
|
|
|
|
|
demo.launch(debug=True) |
|
|
|
|