Spaces:
Runtime error
Runtime error
feat(project): update project
Browse files- app.py +80 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from typing import Any
|
4 |
+
import datasets
|
5 |
+
from tqdm import tqdm
|
6 |
+
from huggingface_hub import login
|
7 |
+
import os
|
8 |
+
login(os.environ.get("HF_Token"))
|
9 |
+
test = datasets.load_dataset(
|
10 |
+
"minskiter/weibo",
|
11 |
+
split=datasets.Split.TEST,
|
12 |
+
streaming=True
|
13 |
+
)
|
14 |
+
int2str = test.features["labels"].feature.int2str
|
15 |
+
page_size = 10
|
16 |
+
pages = []
|
17 |
+
i = 0
|
18 |
+
page = pd.DataFrame(test.take(page_size))
|
19 |
+
with tqdm(desc="load dataset") as bar:
|
20 |
+
while len(page.index)>0:
|
21 |
+
pages.append(page)
|
22 |
+
i+=1
|
23 |
+
page = pd.DataFrame(test.skip(i*page_size).take(page_size))
|
24 |
+
bar.update(i)
|
25 |
+
|
26 |
+
cur = pages[0]
|
27 |
+
|
28 |
+
def show(page: float)-> pd.DataFrame:
|
29 |
+
global cur
|
30 |
+
cur = pages[int(page)]
|
31 |
+
return cur
|
32 |
+
|
33 |
+
def getobj():
|
34 |
+
return {
|
35 |
+
"word":[],
|
36 |
+
"start": -1,
|
37 |
+
"end": -1,
|
38 |
+
"entity": "O"
|
39 |
+
}
|
40 |
+
|
41 |
+
def showIter(evt: gr.SelectData)->dict[str, Any]:
|
42 |
+
row = cur.values.tolist()[evt.index[0]]
|
43 |
+
text,labels = row[0],row[1]
|
44 |
+
labels = int2str(list(map(int,labels)))
|
45 |
+
entities = []
|
46 |
+
obj = getobj()
|
47 |
+
labels.append("O")
|
48 |
+
for i,label in enumerate(labels):
|
49 |
+
if label[0]=='B' or label[0]=="S" or label[0]=="O":
|
50 |
+
if len(obj["word"])>0:
|
51 |
+
obj["word"] = "".join(obj["word"])
|
52 |
+
entities.append(obj)
|
53 |
+
obj = getobj()
|
54 |
+
if label[0]=="B":
|
55 |
+
obj["start"] = i
|
56 |
+
obj["end"] = i+1
|
57 |
+
obj["word"].append(text[i])
|
58 |
+
obj["entity"] = label.split("-")[-1]
|
59 |
+
elif label[0]=="S":
|
60 |
+
obj["start"] = i
|
61 |
+
obj["end"] = i+1
|
62 |
+
obj["word"] = text[i]
|
63 |
+
obj["entity"] = label.split("-")[-1]
|
64 |
+
entities.append(obj)
|
65 |
+
obj = getobj()
|
66 |
+
elif label[0]=='E' or label[0]=="I" or label[0]=="M":
|
67 |
+
obj["word"].append(text[i])
|
68 |
+
obj["end"] = i+1
|
69 |
+
return {"text": "".join(text), "entities": entities}
|
70 |
+
|
71 |
+
with gr.Blocks() as demo:
|
72 |
+
with gr.Row():
|
73 |
+
with gr.Column():
|
74 |
+
output = gr.DataFrame(value=cur)
|
75 |
+
page = gr.Number(minimum=0,maximum=len(pages)-1,label="page")
|
76 |
+
page.change(show, page, outputs=output)
|
77 |
+
text = gr.HighlightedText(label="preview")
|
78 |
+
output.select(showIter,inputs=[], outputs=[text])
|
79 |
+
|
80 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.30.1
|
2 |
+
gradio==3.36.1
|
3 |
+
huggingface-hub==0.15.1
|
4 |
+
pandas==2.0.3
|
5 |
+
datasets==2.14.0
|