Spaces:
Runtime error
Runtime error
File size: 3,639 Bytes
e20d7fd dd05a16 e20d7fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install -U sentence-transformers
# !pip install gradio chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import ast
from ast import literal_eval
import chromadb
from chromadb.utils import embedding_functions
import gdown
url = 'https://drive.google.com/uc?id='
file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw'
output = '25k IMDb movie Dataset.csv'
gdown.download(url+file_id, output, quiet=False)
df = pd.read_csv(output)
def concatenar_lista(lista):
lista = literal_eval(lista)
return ' '.join(lista)
def string_to_list(lista):
lista = literal_eval(lista)
return lista
df = df.fillna(' ')
df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista)
df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista)
df['Generes'] = df['Generes'].apply(string_to_list)
df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float")
unique_generes = df['Generes'].explode().unique()
df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True)
df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1)
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True)
df['embeddings'] = embeddings.tolist()
df['ids'] = df.index
df['ids'] = df['ids'].astype('str')
client_persistent = chromadb.PersistentClient(path='data_embeddings')
db = client_persistent.create_collection(name='movies_db')
df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x))
from torch import embedding
db.add(
ids = df['ids'].tolist(),
embeddings = df['embeddings'].tolist(),
metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records')
)
from chromadb.api.types import Metadatas
def search(query, genre, rating, num):
num = int(num)
if rating:
filter_rating = rating
else:
filter_rating = 0
if genre:
conditions = {
"$and": [
{"Generes": genre},
{"Rating": {"$gte": filter_rating}}
]
}
else:
conditions = {
"Rating": {"$gte": filter_rating}
}
responses = db.query(
query_texts=[query],
n_results=num,
where=conditions,
include=['metadatas']
)
response_data = []
for response in responses['metadatas']:
for metadata in response:
if not isinstance(genre, list):
genre = [genre]
response_data.append({
'Title': metadata['movie title'],
'Overview': metadata['Overview'],
'Director': metadata['Director'],
'Stars': metadata['Stars'],
'Genre': metadata['Generes'],
'year': metadata['year'],
'Rating': metadata['Rating']
})
df = pd.DataFrame(response_data)
return df
import gradio as gr
genres = unique_generes.tolist()
iface = gr.Interface(
fn=search,
inputs=[
gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"),
gr.Dropdown(choices=genres, label="Género de la película"),
gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"),
gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados")
],
outputs=gr.Dataframe(type="pandas",label="Resultados"),
title="Buscador de películas",
description="Introduce tu consulta, selecciona un género y define una puntuación mínima para buscar películas."
)
iface.launch(share=False) |