File size: 11,882 Bytes
e66a197
b4aebff
 
6147540
5917490
b4aebff
 
dd08d78
4a69a01
3f49bfa
8101a0b
4a69a01
 
 
 
 
 
 
 
 
 
 
 
8101a0b
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a69a01
 
b4aebff
 
 
 
 
 
 
 
 
 
 
d350c95
 
 
 
 
 
 
 
 
 
 
b4aebff
e66a197
b4aebff
 
 
 
 
 
e66a197
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
3152190
 
 
 
 
0618a80
 
 
3152190
 
6fbea2f
3152190
5aba5d7
0618a80
 
5aba5d7
0618a80
 
 
 
 
 
 
 
 
 
 
 
5aba5d7
100ece9
 
 
b4aebff
 
 
 
 
 
 
 
 
23af236
 
 
142bf38
 
b4aebff
 
070ea4d
b4aebff
 
 
 
 
 
 
 
 
 
 
 
 
 
01746ea
 
8101a0b
01746ea
 
 
b4aebff
 
 
 
 
 
 
01746ea
b4aebff
 
 
 
 
8101a0b
 
 
 
 
 
 
01746ea
b4aebff
 
 
 
 
 
 
 
 
 
 
817d2a2
b4aebff
817d2a2
 
 
 
 
 
 
 
 
 
 
 
 
 
915d761
817d2a2
 
e8c295b
3152190
 
 
 
b4aebff
 
 
 
d203012
37bbe9e
1d16be6
b4aebff
 
d203012
b4aebff
 
 
 
 
 
 
 
f68affc
 
4a69a01
f68affc
b4aebff
f68affc
b4aebff
 
 
 
 
6eec559
54044cc
b4aebff
 
 
 
 
d203012
8a45492
b4aebff
817d2a2
b4aebff
 
817d2a2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import gradio as gr
import pandas as pd
import librosa
import os
import torch
import numpy as np
from datasets import Dataset, DatasetDict
from datasets import load_dataset
from df.enhance import enhance, init_df, load_audio, save_audio
model_enhance, df_state, _ = init_df()
def remove_nn(wav, sample_rate=16000):
    
    audio=librosa.resample(wav,orig_sr=sample_rate,target_sr=df_state.sr(),)
    
    audio=torch.tensor([audio])
  #  audio, _ = load_audio('full_generation.wav', sr=df_state.sr())
  
    
    enhanced = enhance(model_enhance, df_state, audio)
    
   # save_audio("enhanced.wav", enhanced, df_state.sr())
    audiodata=librosa.resample(enhanced[0].numpy(),orig_sr=df_state.sr(),target_sr=sample_rate)
   
    return 16000, audiodata/np.max(audiodata)
class DataEditor:
    def __init__(self,df):
        self.df=df
        self.current_selected = -1
        self.current_page = 0
        self.datatable =df
        self.data =self.df[['text','flag']]
        self.sdata =self.df['audio'].to_list()
    def settt(self,df):
        self.df=pd.DataFrame()
        self.data =pd.DataFrame()
        self.sdata =[]
        self.df=df
        self.next_prveo=0
        self.data =self.df[['text','flag']]
        self.sdata =self.df['audio'].to_list()
        self.current_page = 0
        self.current_selected =1
        return self.data
    def get_output_audio(self):
        return self.sdata[self.current_selected] if self.current_selected >= 0 else None
    def get_prev_page(self,pagenumber):
        if self.next_prveo>=0:
            self.next_prveo-=1
            self.current_page=self.next_prveo
            row = self.data.iloc[self.next_prveo]
            txt_audio = row['text']
            return txt_audio
    def finsh_data(self):
        self.df['audio'] = self.sdata
        self.df[['text','flag']]=self.data
        return self.df
    def login(self, token):
        # Your actual login logic here (e.g., database check)
        if token == os.environ.get("token_login") :
            return gr.update(visible=False),gr.update(visible=True),True
        else:
            return gr.update(visible=True), gr.update(visible=False),None
    def load_demo(self,sesion):
        if sesion:
            return  gr.update(visible=False),gr.update(visible=True)
        
        return gr.update(visible=True), gr.update(visible=False)
    def get_next_page(self,pagenumber):

          if self.next_prveo<9:
            self.next_prveo+=1
            self.current_page=self.next_prveo
            row = self.data.iloc[self.next_prveo]
            txt_audio = row['text']
            return txt_audio


    def get_page_data(self, page_number):
        start_index = page_number * 10
        end_index = start_index + 10
        return self.data.iloc[start_index:end_index]
    def update_page(self, new_page):
        self.current_page = new_page
        return (
            self.get_page_data(self.current_page),
            self.current_page > 0,
            self.current_page < len(self.data) // 10 - 1,
            self.current_page
        )
    def  create_Tabs(self): # fix: method was missing
        #with gr.Blocks() as interface:
             with gr.Tabs():
                  
                  with gr.TabItem("Dir"):
                        self.text_input = gr.Textbox(lines=5, placeholder="Enter your text here...",rtl=True)
                        self.sigmant_word=gr.Number(label="sigmant_word",value=6)
                        self.buttonn = gr.Button("Create Table")
                  with gr.TabItem("Cut  Text"):
                       self.txturll = gr.Textbox(placeholder="link dir", interactive=True)
                       self.btn_displayy = gr.Button("Load Dataset",scale=1, size="sm")

    def convert_to_dataframe(self,chunks):
          df = pd.DataFrame({'Text': chunks, 'Flag': 0, 'Audio': None })
          return df
    def create_chunks_with_properties(self,text,sigmant_word):
           words = text.split()  # تقسيم النص إلى كلمات
           chunks = []
           current_chunk = []
           for word in words:
            current_chunk.append(word)
            if len(current_chunk) ==sigmant_word:  # إذا وصل عدد الكلمات في الجزء الحالي إلى 6
              chunks.append(" ".join(current_chunk))  # إضافة الجزء إلى القائمة وإعادة تهيئة الجزء الحالي
              current_chunk = []
        
           if current_chunk:  # إضافة الجزء الأخير إذا لم يكن فارغًا
             chunks.append(" ".join(current_chunk))
        
           chunks=self.convert_to_dataframe(chunks)
           v=self.settt(df)
           return v
                         
    def convert_dataframe_to_dataset(self, namedata):
        datatable=self.finsh_data()
        if "__index_level_0__" in datatable.columns:
               datatable =datatable.drop(columns=["__index_level_0__"])
        train_df =datatable
        ds = {
            "train": Dataset.from_pandas(train_df)
        }
        dataset = DatasetDict(ds)
        #dirr = '/content/drive/MyDrive/vitsM/DATA/sata/NewData2hba/' + namedata
        #dataset.save_to_disk(dirr)
        dataset.push_to_hub(namedata,token=os.environ.get("auth_acess_data"),private=True)
        return namedata
        
    def read_dataset(self, link):
        try:
            dataset =load_dataset(link,token=os.environ.get("auth_acess_data")) 
            df= dataset["train"].to_pandas()
            v=self.settt(df)
            return self.get_page_data(self.current_page),link

        except FileNotFoundError:
            return None, f"Error: Dataset not found at {link}"
        except Exception as e:
            return None, f"Error loading dataset: {e}"
    def on_select(self,evt:gr.SelectData):
        if evt.index:
            index_now = evt.index[0]
            self.current_selected = (self.current_page * 10) + index_now
            row = self.data.iloc[self.current_selected]
            txt_audio = row['text']
            row_audio = self.sdata[self.current_selected]
            if row['flag'] !=0:
                return txt_audio,(16000,row_audio)
            else :
                return txt_audio,None
          

        else:
            return None," "
    def on_saveAs_row(self):
        return self.get_page_data(self.current_page),None,""

    def on_row_save(self, text,data_oudio):
        if text!="" and data_oudio is not None:                         

                row = self.data.iloc[self.current_selected]
                #row['text'] = text
                row['flag']=1
                self.data.iloc[self.current_selected] = row
                sr,audio=data_oudio
                if sr!=16000:
                    audio=audio.astype(np.float32)
                    audio/=np.max(np.abs(audio))
                    audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
                self.sdata[self.current_selected] =audio
               
        return  "",None
    def on_row_delete(self):
        if self.current_selected>=0:
              row = self.data.iloc[self.current_selected]
                #row['text'] = text
              row['flag']=0
              self.data.iloc[self.current_selected] = row
              self.sdata[self.current_selected] = None



        return self.get_page_data(self.current_page),None,""
    def startt(self):
        with gr.Blocks() as demo:
                        sesion_state = gr.State()
            
                        with gr.Column(scale=1, min_width=200,visible=True) as login_panal:  # Login panel
                                gr.Markdown("## auth acess page")
                                token_login = gr.Textbox(label="token")
                              
                                login_button = gr.Button("Login")
                        with gr.Column(scale=1, visible=False) as main_panel:
                                 self.create_interface()
                               
                        login_button.click(self.login, inputs=[token_login], outputs=[login_panal,main_panel,sesion_state])
                        demo.load(self.load_demo, [sesion_state], [login_panal,main_panel])


        return demo
    def create_interface(self):
       # with gr.Blocks() as demo:
                          
            with gr.Row():
                self.create_Tabs()
                #self.txturll = gr.Textbox(placeholder="link dir", interactive=True)
                #self.btn_displayy = gr.Button("Load Dataset",scale=1, size="sm",variant="primary")
            with gr.Row():
                with gr.Column():
                    self.table = gr.Dataframe(value=self.datatable, headers=['text', 'audio'], interactive=True)
                    with gr.Row(equal_height=False):
                            self.prev_button = gr.Button("Previous Page",scale=1, size="sm",variant="primary")
                            self.page_number = gr.Number(value=self.current_page + 1, label="Page",scale=1)
                            self.next_button = gr.Button("Next Page",scale=1, size="sm",variant="primary")
                    with gr.Column():
                        self.txtsaveurl = gr.Textbox(placeholder="Save Dataset", interactive=True)
                        self.btn_savedataset = gr.Button("Save Dataset",scale=1, size="sm",variant="primary")
                        self.label =gr.Text("STATE")

                with gr.Column():

                     self.txt_audio = gr.Textbox(label="Audio Text", interactive=True,rtl=True)
                     self.btn_record = gr.Audio(interactive=True)

                     with gr.Row():
                          self.btn_save = gr.Button("Save", size="sm",variant="primary",min_width=50)
                          self.btn_saveAs = gr.Button("SaveAs", size="sm",variant="primary",min_width=50)
                          self.btn_enhance = gr.Button("enhance ", size="sm",variant="primary",min_width=50)
                         # self.buttonnext=gr.Button("Next",)
                         # self.buttonprev=gr.Button("Prev")
                          self.btn_delete = gr.Button("Delete", size="sm",variant="primary",min_width=50)


            self.btn_displayy.click(self.read_dataset, [self.txturll], [self.table, self.txtsaveurl])
            self.table.select(self.on_select, None,[self.txt_audio,self.btn_record])
            self.btn_save.click(self.on_row_save, [self.txt_audio,self.btn_record],[self.txt_audio,self.btn_record])
            self.btn_saveAs.click(self.on_saveAs_row, [], [self.table,self.btn_record , self.txt_audio])
            self.btn_delete.click(self.on_row_delete, [], [self.table,self.btn_record , self.txt_audio])
            #self.buttonnext.click(lambda page:self.get_next_page(page+1), [self.page_number], [self.txt_audio])
            #self.buttonprev.click(lambda page:self.get_prev_page(page-1), [self.page_number], [self.txt_audio])
            self.btn_savedataset.click(self.convert_dataframe_to_dataset, [self.txtsaveurl], [self.label])
            self.prev_button.click(lambda page: self.update_page(page - 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
                   #self.btn_save.click(self.save_row, [self.txt_audio,self.audio_player], [self.data_table])
            self.btn_enhance.click(lambda: remove_nn(self.get_output_audio()), [],self.btn_record)
            self.buttonn.click(self.create_chunks_with_properties,[self.text_input,self.sigmant_word],[self.table])
            self.next_button.click(lambda page: self.update_page(page + 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number])
        #demo.launch()
df1=pd.DataFrame(columns=['text','flag','audio'])
editor = DataEditor(df1)
demo=editor.startt()
demo.launch()