Spaces:
Runtime error
Runtime error
Commit
·
9d1ad33
1
Parent(s):
5396fab
App test
Browse files- Dockerfile +28 -0
- app.py +54 -0
- requirements.txt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
# Set up a new user named "user" with user ID 1000
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
+
# Switch to the "user" user
|
7 |
+
USER user
|
8 |
+
|
9 |
+
# Set home to the user's home directory
|
10 |
+
ENV HOME=/home/user \
|
11 |
+
PATH=/home/user/.local/bin:$PATH
|
12 |
+
|
13 |
+
# Set the working directory to the user's home directory
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
|
17 |
+
RUN pip install --no-cache-dir --upgrade pip
|
18 |
+
|
19 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
|
22 |
+
COPY --chown=user requirements.txt .
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
25 |
+
|
26 |
+
COPY --chown=user app.py app.py
|
27 |
+
|
28 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import solara
|
2 |
+
import random
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
import pandas as pd
|
6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
+
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained('gpt2')
|
9 |
+
model = AutoModelForCausalLM.from_pretrained('gpt2')
|
10 |
+
text1 = solara.reactive("Never gonna give you up, never gonna let you")
|
11 |
+
@solara.component
|
12 |
+
def Page():
|
13 |
+
with solara.Column(margin=10):
|
14 |
+
solara.Markdown("#Next token prediction visualization")
|
15 |
+
solara.Markdown("I built this tool to help me understand autoregressive language models. For any given text, it gives the top 10 candidates to be the next token with their respective probabilities. The language model I'm using is the smallest version of GPT-2, with 124M parameters.")
|
16 |
+
def on_action_cell(column, row_index):
|
17 |
+
text1.value += tokenizer.decode(top_10.indices[0][row_index])
|
18 |
+
cell_actions = [solara.CellAction(icon="mdi-thumb-up", name="Select", on_click=on_action_cell)]
|
19 |
+
solara.InputText("Enter text:", value=text1, continuous_update=True)
|
20 |
+
if text1.value != "":
|
21 |
+
tokens = tokenizer.encode(text1.value, return_tensors="pt")
|
22 |
+
spans1 = ""
|
23 |
+
spans2 = ""
|
24 |
+
for i, token in enumerate(tokens[0]):
|
25 |
+
random.seed(i)
|
26 |
+
random_color = ''.join([random.choice('0123456789ABCDEF') for k in range(6)])
|
27 |
+
spans1 += " " + f"<span style='font-family: helvetica; color: #{random_color}'>{token}</span>"
|
28 |
+
spans2 += " " + f"""<span style="
|
29 |
+
padding: 6px;
|
30 |
+
border-right: 3px solid white;
|
31 |
+
line-height: 3em;
|
32 |
+
font-family: courier;
|
33 |
+
background-color: #{random_color};
|
34 |
+
color: white;
|
35 |
+
position: relative;
|
36 |
+
"><span style="
|
37 |
+
position: absolute;
|
38 |
+
top: 5.5ch;
|
39 |
+
line-height: 1em;
|
40 |
+
left: -0.5px;
|
41 |
+
font-size: 0.45em"> {token}</span>{tokenizer.decode([token])}</span>"""
|
42 |
+
solara.Markdown(f'{spans2}')
|
43 |
+
solara.Markdown(f'{spans1}')
|
44 |
+
outputs = model.generate(tokens, max_new_tokens=1, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id)
|
45 |
+
scores = F.softmax(outputs.scores[0], dim=-1)
|
46 |
+
top_10 = torch.topk(scores, 10)
|
47 |
+
df = pd.DataFrame()
|
48 |
+
df["probs"] = top_10.values[0]
|
49 |
+
df["probs"] = [f"{value:.2%}" for value in df["probs"].values]
|
50 |
+
df["next token ID"] = [top_10.indices[0][i].numpy() for i in range(10)]
|
51 |
+
df["predicted next token"] = [tokenizer.decode(top_10.indices[0][i]) for i in range(10)]
|
52 |
+
solara.Markdown("###Prediction")
|
53 |
+
solara.DataFrame(df, items_per_page=10, cell_actions=cell_actions)
|
54 |
+
Page()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
solara
|
2 |
+
pandas
|
3 |
+
transformers[torch]
|