minskiter commited on
Commit
f9d052c
·
1 Parent(s): 5a0b083

feat(project): update project

Browse files
Files changed (2) hide show
  1. app.py +80 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from typing import Any
4
+ import datasets
5
+ from tqdm import tqdm
6
+ from huggingface_hub import login
7
+ import os
8
+ login(os.environ.get("HF_Token"))
9
+ test = datasets.load_dataset(
10
+ "minskiter/weibo",
11
+ split=datasets.Split.TEST,
12
+ streaming=True
13
+ )
14
+ int2str = test.features["labels"].feature.int2str
15
+ page_size = 10
16
+ pages = []
17
+ i = 0
18
+ page = pd.DataFrame(test.take(page_size))
19
+ with tqdm(desc="load dataset") as bar:
20
+ while len(page.index)>0:
21
+ pages.append(page)
22
+ i+=1
23
+ page = pd.DataFrame(test.skip(i*page_size).take(page_size))
24
+ bar.update(i)
25
+
26
+ cur = pages[0]
27
+
28
+ def show(page: float)-> pd.DataFrame:
29
+ global cur
30
+ cur = pages[int(page)]
31
+ return cur
32
+
33
+ def getobj():
34
+ return {
35
+ "word":[],
36
+ "start": -1,
37
+ "end": -1,
38
+ "entity": "O"
39
+ }
40
+
41
+ def showIter(evt: gr.SelectData)->dict[str, Any]:
42
+ row = cur.values.tolist()[evt.index[0]]
43
+ text,labels = row[0],row[1]
44
+ labels = int2str(list(map(int,labels)))
45
+ entities = []
46
+ obj = getobj()
47
+ labels.append("O")
48
+ for i,label in enumerate(labels):
49
+ if label[0]=='B' or label[0]=="S" or label[0]=="O":
50
+ if len(obj["word"])>0:
51
+ obj["word"] = "".join(obj["word"])
52
+ entities.append(obj)
53
+ obj = getobj()
54
+ if label[0]=="B":
55
+ obj["start"] = i
56
+ obj["end"] = i+1
57
+ obj["word"].append(text[i])
58
+ obj["entity"] = label.split("-")[-1]
59
+ elif label[0]=="S":
60
+ obj["start"] = i
61
+ obj["end"] = i+1
62
+ obj["word"] = text[i]
63
+ obj["entity"] = label.split("-")[-1]
64
+ entities.append(obj)
65
+ obj = getobj()
66
+ elif label[0]=='E' or label[0]=="I" or label[0]=="M":
67
+ obj["word"].append(text[i])
68
+ obj["end"] = i+1
69
+ return {"text": "".join(text), "entities": entities}
70
+
71
+ with gr.Blocks() as demo:
72
+ with gr.Row():
73
+ with gr.Column():
74
+ output = gr.DataFrame(value=cur)
75
+ page = gr.Number(minimum=0,maximum=len(pages)-1,label="page")
76
+ page.change(show, page, outputs=output)
77
+ text = gr.HighlightedText(label="preview")
78
+ output.select(showIter,inputs=[], outputs=[text])
79
+
80
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.30.1
2
+ gradio==3.36.1
3
+ huggingface-hub==0.15.1
4
+ pandas==2.0.3
5
+ datasets==2.14.0