File size: 8,768 Bytes
e66a197
b4aebff
 
6147540
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d350c95
 
 
 
 
 
 
 
 
 
 
b4aebff
e66a197
b4aebff
 
 
 
 
 
e66a197
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1974640
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817d2a2
b4aebff
817d2a2
 
 
 
 
 
 
 
 
 
 
 
 
 
915d761
817d2a2
 
e8c295b
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817d2a2
b4aebff
 
817d2a2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import gradio as gr
import pandas as pd
import librosa
import os
import numpy as np
from datasets import Dataset, DatasetDict
class DataEditor:
    def __init__(self,df):
        self.df=df
        self.current_selected = -1
        self.current_page = 0
        self.datatable =df
        self.data =self.df[['text','flag']]
        self.sdata =self.df['audio'].to_list()
    def settt(self,df):
        self.df=pd.DataFrame()
        self.data =pd.DataFrame()
        self.sdata =[]
        self.df=df
        self.next_prveo=0
        self.data =self.df[['text','flag']]
        self.sdata =self.df['audio'].to_list()
        self.current_page = 0
        self.current_selected =1
        return self.data
    def get_prev_page(self,pagenumber):
        if self.next_prveo>=0:
            self.next_prveo-=1
            self.current_page=self.next_prveo
            row = self.data.iloc[self.next_prveo]
            txt_audio = row['text']
            return txt_audio
    def finsh_data(self):
        self.df['audio'] = self.sdata
        self.df[['text','flag']]=self.data
        return self.df
    def login(self, token):
        # Your actual login logic here (e.g., database check)
        if token == os.environ.get("token_login") :
            return gr.update(visible=False),gr.update(visible=True),True
        else:
            return gr.update(visible=True), gr.update(visible=False),None
    def load_demo(self,sesion):
        if sesion:
            return  gr.update(visible=False),gr.update(visible=True)
        
        return gr.update(visible=True), gr.update(visible=False)
    def get_next_page(self,pagenumber):

          if self.next_prveo<9:
            self.next_prveo+=1
            self.current_page=self.next_prveo
            row = self.data.iloc[self.next_prveo]
            txt_audio = row['text']
            return txt_audio


    def get_page_data(self, page_number):
        start_index = page_number * 10
        end_index = start_index + 10
        return self.data.iloc[start_index:end_index]
    def update_page(self, new_page):
        self.current_page = new_page
        return (
            self.get_page_data(self.current_page),
            self.current_page > 0,
            self.current_page < len(self.data) // 10 - 1,
            self.current_page
        )
    def convert_dataframe_to_dataset(self, namedata):
        datatable=self.finsh_data()
        if "__index_level_0__" in datatable.columns:
               datatable =datatable.drop(columns=["__index_level_0__"])
        train_df =datatable
        ds = {
            "train": Dataset.from_pandas(train_df)
        }
        dataset = DatasetDict(ds)
        dirr = '/content/drive/MyDrive/vitsM/DATA/sata/NewData2hba/' + namedata
        dataset.save_to_disk(dirr)
        return dirr
    def read_dataset(self, link):
        try:
            dataset = DatasetDict.load_from_disk(link,token=os.environ.get("auth_acess_data"))
            df= dataset["train"].to_pandas()
            v=self.settt(df)
            return self.get_page_data(self.current_page),link

        except FileNotFoundError:
            return None, f"Error: Dataset not found at {link}"
        except Exception as e:
            return None, f"Error loading dataset: {e}"
    def on_select(self,evt:gr.SelectData):
        if evt.index:
            index_now = evt.index[0]
            self.current_selected = (self.current_page * 10) + index_now
            row = self.data.iloc[self.current_selected]
            txt_audio = row['text']
            #row_audio = self.sdata[self.current_selected]
            #if row_audio == None:
            zeros_array = np.zeros(100)
            audio_array = np.zeros(10, dtype=np.float32)
            audio = librosa.resample(audio_array, orig_sr=16000, target_sr=16000)
            #audio=librosa.resample(zeros_array,orig_sr=16000,target_sr=16000)
            return txt_audio,None
          #  else:
            return ""

        else:
            return None," "
    def on_saveAs_row(self):
        return self.get_page_data(self.current_page),None,""

    def on_row_save(self, text,data_oudio):
          if text!="" :

                row = self.data.iloc[self.current_selected]
                #row['text'] = text
                row['flag']=1
                self.data.iloc[self.current_selected] = row
                if data_oudio!=None:
                  self.sdata[self.current_selected] = data_oudio


                return  "",data_oudio
          else:
               return "",None
    def on_row_delete(self):
        if self.current_selected>=0:
              row = self.data.iloc[self.current_selected]
                #row['text'] = text
              row['flag']=0
              self.data.iloc[self.current_selected] = row
              self.sdata[self.current_selected] = None



        return self.get_page_data(self.current_page),None,""
    def startt(self):
        with gr.Blocks() as demo:
                        sesion_state = gr.State()
            
                        with gr.Column(scale=1, min_width=200,visible=True) as login_panal:  # Login panel
                                gr.Markdown("## auth acess page")
                                token_login = gr.Textbox(label="token")
                              
                                login_button = gr.Button("Login")
                        with gr.Column(scale=1, visible=False) as main_panel:
                                 self.create_interface()
                               
                        login_button.click(self.login, inputs=[token_login], outputs=[login_panal,main_panel,sesion_state])
                        demo.load(self.load_demo, [sesion_state], [login_panal,main_panel])


        return demo
    def create_interface(self):
       # with gr.Blocks() as demo:
                          
            with gr.Column():
                self.txturll = gr.Textbox(placeholder="link dir", interactive=True)
                self.btn_displayy = gr.Button("Load Dataset")
            with gr.Row():
                with gr.Column():
                    self.table = gr.Dataframe(value=self.datatable, headers=['text', 'audio'], interactive=True)
                    with gr.Row(equal_height=False):
                            self.prev_button = gr.Button("Previous Page",scale=1, size="sm")
                            self.page_number = gr.Number(value=self.current_page + 1, label="Page",scale=1)
                            self.next_button = gr.Button("Next Page",scale=1, size="sm")
                    with gr.Column():
                        self.txtsaveurl = gr.Textbox(placeholder="Save Dataset", interactive=True)
                        self.btn_savedataset = gr.Button("Save Dataset")
                        self.label =gr.Text("STATE")

                with gr.Column():

                     self.txt_audio = gr.Textbox(label="Audio Text", interactive=True,rtl=True)
                     self.btn_record = gr.Audio(interactive=True)

                     with gr.Row():
                          self.btn_save = gr.Button("Save")
                          self.btn_saveAs = gr.Button("SaveAs")
                         # self.buttonnext=gr.Button("Next")
                         # self.buttonprev=gr.Button("Prev")
                          self.btn_delete = gr.Button("Delete")


            self.btn_displayy.click(self.read_dataset, [self.txturll], [self.table, self.txtsaveurl])
            self.table.select(self.on_select, None,[self.txt_audio,self.btn_record])
            self.btn_save.click(self.on_row_save, [self.txt_audio,self.btn_record],[self.txt_audio,self.btn_record])
            self.btn_saveAs.click(self.on_saveAs_row, [], [self.table, self.txt_audio, self.label])
            self.btn_delete.click(self.on_row_delete, [], [self.table, self.txt_audio, self.label])
            #self.buttonnext.click(lambda page:self.get_next_page(page+1), [self.page_number], [self.txt_audio])
            #self.buttonprev.click(lambda page:self.get_prev_page(page-1), [self.page_number], [self.txt_audio])
            self.btn_savedataset.click(self.convert_dataframe_to_dataset, [self.txtsaveurl], [self.label])
            self.prev_button.click(lambda page: self.update_page(page - 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
                   #self.btn_save.click(self.save_row, [self.txt_audio,self.audio_player], [self.data_table])
            self.next_button.click(lambda page: self.update_page(page + 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
        #demo.launch()
df1=pd.DataFrame(columns=['text','flag','audio'])
editor = DataEditor(df1)
demo=editor.startt()
demo.launch()