Spaces:
Running
Running
File size: 1,755 Bytes
6f51580 c4841c7 6f51580 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import solara
# Clean up all the directories used in this notebook
import shutil
shutil.rmtree("./data", ignore_errors=True)
import polars as pl
df = pl.read_csv(
"https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
)
import string
df = df.with_columns(
pl.Series("Album", [string.capwords(album) for album in df["Album"]])
)
df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
df = df.with_columns(pl.col("Lyrics").fill_null("None"))
df = df.with_columns(
text=pl.lit("# ")
+ pl.col("Album")
+ pl.lit(": ")
+ pl.col("Song")
+ pl.lit("\n\n")
+ pl.col("Lyrics")
# text = pl.col("Lyrics")
)
import lancedb
db = lancedb.connect("data/")
from lancedb.embeddings import get_registry
embeddings = (
get_registry()
.get("sentence-transformers")
.create(name="TaylorAI/gte-tiny", device="cpu")
)
from lancedb.pydantic import LanceModel, Vector
class Songs(LanceModel):
Song: str
Lyrics: str
Album: str
Artist: str
text: str = embeddings.SourceField()
vector: Vector(embeddings.ndims()) = embeddings.VectorField()
table = db.create_table("Songs", schema=Songs)
table.add(data=df)
query = solara.reactive("Which song is about a boy who is having nightmares?")
@solara.component
def Page():
with solara.Column(margin=10):
solara.Markdown("# Metallica Song Finder")
solara.InputText("Enter some query:", query, continuous_update=True)
if query.value != "":
df_results = table.search(query.value).limit(10).to_polars()
df_results = df_results.select(['Song', 'Album', '_distance', 'Lyrics', 'Artist'])
solara.DataFrame(df_results, items_per_page=10)
|