Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,172 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
from datasets import Dataset, DatasetDict
|
6 |
+
class DataEditor:
|
7 |
+
def __init__(self,df):
|
8 |
+
self.df=df
|
9 |
+
self.current_selected = -1
|
10 |
+
self.current_page = 0
|
11 |
+
self.datatable =df
|
12 |
+
self.data =self.df[['text','flag']]
|
13 |
+
self.sdata =self.df['audio'].to_list()
|
14 |
+
def settt(self,df):
|
15 |
+
self.df=pd.DataFrame()
|
16 |
+
self.data =pd.DataFrame()
|
17 |
+
self.sdata =[]
|
18 |
+
self.df=df
|
19 |
+
self.next_prveo=0
|
20 |
+
self.data =self.df[['text','flag']]
|
21 |
+
self.sdata =self.df['audio'].to_list()
|
22 |
+
self.current_page = 0
|
23 |
+
self.current_selected =1
|
24 |
+
return self.data
|
25 |
+
def get_prev_page(self,pagenumber):
|
26 |
+
if self.next_prveo>=0:
|
27 |
+
self.next_prveo-=1
|
28 |
+
self.current_page=self.next_prveo
|
29 |
+
row = self.data.iloc[self.next_prveo]
|
30 |
+
txt_audio = row['text']
|
31 |
+
return txt_audio
|
32 |
+
def finsh_data(self):
|
33 |
+
self.df['audio'] = self.sdata
|
34 |
+
self.df[['text','flag']]=self.data
|
35 |
+
return self.df
|
36 |
+
def get_next_page(self,pagenumber):
|
37 |
|
38 |
+
if self.next_prveo<9:
|
39 |
+
self.next_prveo+=1
|
40 |
+
self.current_page=self.next_prveo
|
41 |
+
row = self.data.iloc[self.next_prveo]
|
42 |
+
txt_audio = row['text']
|
43 |
+
return txt_audio
|
44 |
|
45 |
+
|
46 |
+
def get_page_data(self, page_number):
|
47 |
+
start_index = page_number * 10
|
48 |
+
end_index = start_index + 10
|
49 |
+
return self.data.iloc[start_index:end_index]
|
50 |
+
def update_page(self, new_page):
|
51 |
+
self.current_page = new_page
|
52 |
+
return (
|
53 |
+
self.get_page_data(self.current_page),
|
54 |
+
self.current_page > 0,
|
55 |
+
self.current_page < len(self.data) // 10 - 1,
|
56 |
+
self.current_page
|
57 |
+
)
|
58 |
+
def convert_dataframe_to_dataset(self, namedata):
|
59 |
+
datatable=self.finsh_data()
|
60 |
+
if "__index_level_0__" in datatable.columns:
|
61 |
+
datatable =datatable.drop(columns=["__index_level_0__"])
|
62 |
+
train_df =datatable
|
63 |
+
ds = {
|
64 |
+
"train": Dataset.from_pandas(train_df)
|
65 |
+
}
|
66 |
+
dataset = DatasetDict(ds)
|
67 |
+
dirr = '/content/drive/MyDrive/vitsM/DATA/sata/NewData2hba/' + namedata
|
68 |
+
dataset.save_to_disk(dirr)
|
69 |
+
return dirr
|
70 |
+
def read_dataset(self, link):
|
71 |
+
try:
|
72 |
+
dataset = DatasetDict.load_from_disk(link)
|
73 |
+
df= dataset["train"].to_pandas()
|
74 |
+
v=self.settt(df)
|
75 |
+
return self.get_page_data(self.current_page),link
|
76 |
+
|
77 |
+
except FileNotFoundError:
|
78 |
+
return None, f"Error: Dataset not found at {link}"
|
79 |
+
except Exception as e:
|
80 |
+
return None, f"Error loading dataset: {e}"
|
81 |
+
def on_select(self,evt:gr.SelectData):
|
82 |
+
if evt.index:
|
83 |
+
index_now = evt.index[0]
|
84 |
+
self.current_selected = (self.current_page * 10) + index_now
|
85 |
+
row = self.data.iloc[self.current_selected]
|
86 |
+
txt_audio = row['text']
|
87 |
+
#row_audio = self.sdata[self.current_selected]
|
88 |
+
#if row_audio == None:
|
89 |
+
zeros_array = np.zeros(100)
|
90 |
+
audio_array = np.zeros(10, dtype=np.float32)
|
91 |
+
audio = librosa.resample(audio_array, orig_sr=16000, target_sr=16000)
|
92 |
+
#audio=librosa.resample(zeros_array,orig_sr=16000,target_sr=16000)
|
93 |
+
return txt_audio,None
|
94 |
+
# else:
|
95 |
+
return ""
|
96 |
+
|
97 |
+
else:
|
98 |
+
return None," "
|
99 |
+
def on_saveAs_row(self):
|
100 |
+
return self.get_page_data(self.current_page),None,""
|
101 |
+
|
102 |
+
def on_row_save(self, text,data_oudio):
|
103 |
+
if text!="" :
|
104 |
+
|
105 |
+
row = self.data.iloc[self.current_selected]
|
106 |
+
#row['text'] = text
|
107 |
+
row['flag']=1
|
108 |
+
self.data.iloc[self.current_selected] = row
|
109 |
+
if data_oudio!=None:
|
110 |
+
self.sdata[self.current_selected] = data_oudio
|
111 |
+
|
112 |
+
|
113 |
+
return "",data_oudio
|
114 |
+
else:
|
115 |
+
return "",None
|
116 |
+
def on_row_delete(self):
|
117 |
+
if self.current_selected>=0:
|
118 |
+
row = self.data.iloc[self.current_selected]
|
119 |
+
#row['text'] = text
|
120 |
+
row['flag']=0
|
121 |
+
self.data.iloc[self.current_selected] = row
|
122 |
+
self.sdata[self.current_selected] = None
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
return self.get_page_data(self.current_page),None,""
|
127 |
+
|
128 |
+
def create_interface(self):
|
129 |
+
with gr.Blocks() as demo:
|
130 |
+
with gr.Column():
|
131 |
+
self.txturll = gr.Textbox(placeholder="link dir", interactive=True)
|
132 |
+
self.btn_displayy = gr.Button("Load Dataset")
|
133 |
+
with gr.Row():
|
134 |
+
with gr.Column():
|
135 |
+
self.table = gr.Dataframe(value=self.datatable, headers=['text', 'audio'], interactive=True)
|
136 |
+
with gr.Row(equal_height=False):
|
137 |
+
self.prev_button = gr.Button("Previous Page",scale=1, size="sm")
|
138 |
+
self.page_number = gr.Number(value=self.current_page + 1, label="Page",scale=1)
|
139 |
+
self.next_button = gr.Button("Next Page",scale=1, size="sm")
|
140 |
+
with gr.Column():
|
141 |
+
self.txtsaveurl = gr.Textbox(placeholder="Save Dataset", interactive=True)
|
142 |
+
self.btn_savedataset = gr.Button("Save Dataset")
|
143 |
+
self.label =gr.Text("STATE")
|
144 |
+
|
145 |
+
with gr.Column():
|
146 |
+
|
147 |
+
self.txt_audio = gr.Textbox(label="Audio Text", interactive=True,rtl=True)
|
148 |
+
self.btn_record = gr.Audio(interactive=True)
|
149 |
+
|
150 |
+
with gr.Row():
|
151 |
+
self.btn_save = gr.Button("Save")
|
152 |
+
self.btn_saveAs = gr.Button("SaveAs")
|
153 |
+
# self.buttonnext=gr.Button("Next")
|
154 |
+
# self.buttonprev=gr.Button("Prev")
|
155 |
+
self.btn_delete = gr.Button("Delete")
|
156 |
+
|
157 |
+
|
158 |
+
self.btn_displayy.click(self.read_dataset, [self.txturll], [self.table, self.txtsaveurl])
|
159 |
+
self.table.select(self.on_select, None,[self.txt_audio,self.btn_record])
|
160 |
+
self.btn_save.click(self.on_row_save, [self.txt_audio,self.btn_record],[self.txt_audio,self.btn_record])
|
161 |
+
self.btn_saveAs.click(self.on_saveAs_row, [], [self.table, self.txt_audio, self.label])
|
162 |
+
self.btn_delete.click(self.on_row_delete, [], [self.table, self.txt_audio, self.label])
|
163 |
+
#self.buttonnext.click(lambda page:self.get_next_page(page+1), [self.page_number], [self.txt_audio])
|
164 |
+
#self.buttonprev.click(lambda page:self.get_prev_page(page-1), [self.page_number], [self.txt_audio])
|
165 |
+
self.btn_savedataset.click(self.convert_dataframe_to_dataset, [self.txtsaveurl], [self.label])
|
166 |
+
self.prev_button.click(lambda page: self.update_page(page - 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
|
167 |
+
#self.btn_save.click(self.save_row, [self.txt_audio,self.audio_player], [self.data_table])
|
168 |
+
self.next_button.click(lambda page: self.update_page(page + 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
|
169 |
+
demo.launch()
|
170 |
+
df1=pd.DataFrame(columns=['text','flag','audio'])
|
171 |
+
editor = DataEditor(df1)
|
172 |
+
editor.create_interface()
|