alonsosilva commited on
Commit
6f51580
·
1 Parent(s): df65356
Files changed (3) hide show
  1. Dockerfile +21 -0
  2. app.py +67 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Set up a new user named "user" with user ID 1000 for permission
4
+ RUN useradd -m -u 1000 user
5
+ # Switch to the "user" user
6
+ USER user
7
+ # Set home to the user's home directory
8
+ ENV HOME=/home/user \
9
+ PATH=/home/user/.local/bin:$PATH
10
+
11
+ # Upgreade pip
12
+ RUN pip install --no-cache-dir --upgrade pip
13
+
14
+ COPY --chown=user requirements.txt requirements.txt
15
+
16
+ # Install requirements
17
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
18
+
19
+ COPY --chown=user app.py app.py
20
+
21
+ ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+
3
+ # Clean up all the directories used in this notebook
4
+ import shutil
5
+
6
+ shutil.rmtree("./data", ignore_errors=True)
7
+
8
+ import polars as pl
9
+
10
+ df = pl.read_csv(
11
+ "https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
12
+ )
13
+
14
+ import string
15
+
16
+ df = df.with_columns(
17
+ pl.Series("Album", [string.capwords(album) for album in df["Album"]])
18
+ )
19
+ df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
20
+ df = df.with_columns(pl.col("Lyrics").fill_null("None"))
21
+
22
+ df = df.with_columns(
23
+ text=pl.lit("# ")
24
+ + pl.col("Album")
25
+ + pl.lit(": ")
26
+ + pl.col("Song")
27
+ + pl.lit("\n\n")
28
+ + pl.col("Lyrics")
29
+ # text = pl.col("Lyrics")
30
+ )
31
+
32
+ import lancedb
33
+
34
+ db = lancedb.connect("data/")
35
+
36
+ from lancedb.embeddings import get_registry
37
+
38
+ embeddings = (
39
+ get_registry()
40
+ .get("sentence-transformers")
41
+ .create(name="TaylorAI/gte-tiny", device="cuda")
42
+ )
43
+
44
+ from lancedb.pydantic import LanceModel, Vector
45
+
46
+
47
+ class Songs(LanceModel):
48
+ Song: str
49
+ Lyrics: str
50
+ Album: str
51
+ Artist: str
52
+ text: str = embeddings.SourceField()
53
+ vector: Vector(embeddings.ndims()) = embeddings.VectorField()
54
+
55
+ table = db.create_table("Songs", schema=Songs)
56
+ table.add(data=df)
57
+
58
+ query = solara.reactive("Which song is about a boy who is having nightmares?")
59
+ @solara.component
60
+ def Page():
61
+ with solara.Column(margin=10):
62
+ solara.Markdown("# Metallica Song Finder")
63
+ solara.InputText("Enter some query:", query, continuous_update=True)
64
+ if query.value != "":
65
+ df_results = table.search(query.value).limit(10).to_polars()
66
+ df_results = df_results.select(['Song', 'Album', '_distance', 'Lyrics', 'Artist'])
67
+ solara.DataFrame(df_results, items_per_page=10)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ solara==1.39.0
2
+ polars==1.7.1
3
+ lancedb==0.13.0
4
+ sentence-transformers==3.1.1