Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import librosa | |
import os | |
import torch | |
import numpy as np | |
from datasets import Dataset, DatasetDict | |
from datasets import load_dataset | |
from df.enhance import enhance, init_df, load_audio, save_audio | |
model_enhance, df_state, _ = init_df() | |
def remove_nn(wav, sample_rate=16000): | |
audio=librosa.resample(wav,orig_sr=sample_rate,target_sr=df_state.sr(),) | |
audio=torch.tensor([audio]) | |
# audio, _ = load_audio('full_generation.wav', sr=df_state.sr()) | |
enhanced = enhance(model_enhance, df_state, audio) | |
# save_audio("enhanced.wav", enhanced, df_state.sr()) | |
audiodata=librosa.resample(enhanced[0].numpy(),orig_sr=df_state.sr(),target_sr=sample_rate) | |
return 16000, audiodata/np.max(audiodata) | |
class DataEditor: | |
def __init__(self,df): | |
self.df=df | |
self.current_selected = -1 | |
self.current_page = 0 | |
self.datatable =df | |
self.data =self.df[['text','flag']] | |
self.sdata =self.df['audio'].to_list() | |
def settt(self,df): | |
self.df=pd.DataFrame() | |
self.data =pd.DataFrame() | |
self.sdata =[] | |
self.df=df | |
self.next_prveo=0 | |
self.data =self.df[['text','flag']] | |
self.sdata =self.df['audio'].to_list() | |
self.current_page = 0 | |
self.current_selected =1 | |
return self.data | |
def get_output_audio(self): | |
return self.sdata[self.current_selected] if self.current_selected >= 0 else None | |
def get_prev_page(self,pagenumber): | |
if self.next_prveo>=0: | |
self.next_prveo-=1 | |
self.current_page=self.next_prveo | |
row = self.data.iloc[self.next_prveo] | |
txt_audio = row['text'] | |
return txt_audio | |
def finsh_data(self): | |
self.df['audio'] = self.sdata | |
self.df[['text','flag']]=self.data | |
return self.df | |
def login(self, token): | |
# Your actual login logic here (e.g., database check) | |
if token == os.environ.get("token_login") : | |
return gr.update(visible=False),gr.update(visible=True),True | |
else: | |
return gr.update(visible=True), gr.update(visible=False),None | |
def load_demo(self,sesion): | |
if sesion: | |
return gr.update(visible=False),gr.update(visible=True) | |
return gr.update(visible=True), gr.update(visible=False) | |
def get_next_page(self,pagenumber): | |
if self.next_prveo<9: | |
self.next_prveo+=1 | |
self.current_page=self.next_prveo | |
row = self.data.iloc[self.next_prveo] | |
txt_audio = row['text'] | |
return txt_audio | |
def get_page_data(self, page_number): | |
start_index = page_number * 10 | |
end_index = start_index + 10 | |
return self.data.iloc[start_index:end_index] | |
def update_page(self, new_page): | |
self.current_page = new_page | |
return ( | |
self.get_page_data(self.current_page), | |
self.current_page > 0, | |
self.current_page < len(self.data) // 10 - 1, | |
self.current_page | |
) | |
def create_Tabs(self): # fix: method was missing | |
#with gr.Blocks() as interface: | |
with gr.Tabs(): | |
with gr.TabItem("Dir"): | |
self.text_input = gr.Textbox(lines=5, placeholder="Enter your text here...",rtl=True) | |
self.sigmant_word=gr.Number(label="sigmant_word",value=6) | |
self.buttonn = gr.Button("Create Table") | |
with gr.TabItem("Cut Text"): | |
self.txturll = gr.Textbox(placeholder="link dir", interactive=True) | |
self.btn_displayy = gr.Button("Load Dataset",scale=1, size="sm") | |
def convert_to_dataframe(self,chunks): | |
df = pd.DataFrame({'Text': chunks, 'Flag': 0, 'Audio': None }) | |
return df | |
def create_chunks_with_properties(self,text,sigmant_word): | |
words = text.split() # تقسيم النص إلى كلمات | |
chunks = [] | |
current_chunk = [] | |
for word in words: | |
current_chunk.append(word) | |
if len(current_chunk) ==sigmant_word: # إذا وصل عدد الكلمات في الجزء الحالي إلى 6 | |
chunks.append(" ".join(current_chunk)) # إضافة الجزء إلى القائمة وإعادة تهيئة الجزء الحالي | |
current_chunk = [] | |
if current_chunk: # إضافة الجزء الأخير إذا لم يكن فارغًا | |
chunks.append(" ".join(current_chunk)) | |
chunks=self.convert_to_dataframe(chunks) | |
v=self.settt(df) | |
return v | |
def convert_dataframe_to_dataset(self, namedata): | |
datatable=self.finsh_data() | |
if "__index_level_0__" in datatable.columns: | |
datatable =datatable.drop(columns=["__index_level_0__"]) | |
train_df =datatable | |
ds = { | |
"train": Dataset.from_pandas(train_df) | |
} | |
dataset = DatasetDict(ds) | |
#dirr = '/content/drive/MyDrive/vitsM/DATA/sata/NewData2hba/' + namedata | |
#dataset.save_to_disk(dirr) | |
dataset.push_to_hub(namedata,token=os.environ.get("auth_acess_data"),private=True) | |
return namedata | |
def read_dataset(self, link): | |
try: | |
dataset =load_dataset(link,token=os.environ.get("auth_acess_data")) | |
df= dataset["train"].to_pandas() | |
v=self.settt(df) | |
return self.get_page_data(self.current_page),link | |
except FileNotFoundError: | |
return None, f"Error: Dataset not found at {link}" | |
except Exception as e: | |
return None, f"Error loading dataset: {e}" | |
def on_select(self,evt:gr.SelectData): | |
if evt.index: | |
index_now = evt.index[0] | |
self.current_selected = (self.current_page * 10) + index_now | |
row = self.data.iloc[self.current_selected] | |
txt_audio = row['text'] | |
row_audio = self.sdata[self.current_selected] | |
if row['flag'] !=0: | |
return txt_audio,(16000,row_audio) | |
else : | |
return txt_audio,None | |
else: | |
return None," " | |
def on_saveAs_row(self): | |
return self.get_page_data(self.current_page),None,"" | |
def on_row_save(self, text,data_oudio): | |
if text!="" and data_oudio is not None: | |
row = self.data.iloc[self.current_selected] | |
#row['text'] = text | |
row['flag']=1 | |
self.data.iloc[self.current_selected] = row | |
sr,audio=data_oudio | |
if sr!=16000: | |
audio=audio.astype(np.float32) | |
audio/=np.max(np.abs(audio)) | |
audio=librosa.resample(audio,orig_sr=sr,target_sr=16000) | |
self.sdata[self.current_selected] =audio | |
return "",None | |
def on_row_delete(self): | |
if self.current_selected>=0: | |
row = self.data.iloc[self.current_selected] | |
#row['text'] = text | |
row['flag']=0 | |
self.data.iloc[self.current_selected] = row | |
self.sdata[self.current_selected] = None | |
return self.get_page_data(self.current_page),None,"" | |
def startt(self): | |
with gr.Blocks() as demo: | |
sesion_state = gr.State() | |
with gr.Column(scale=1, min_width=200,visible=True) as login_panal: # Login panel | |
gr.Markdown("## auth acess page") | |
token_login = gr.Textbox(label="token") | |
login_button = gr.Button("Login") | |
with gr.Column(scale=1, visible=False) as main_panel: | |
self.create_interface() | |
login_button.click(self.login, inputs=[token_login], outputs=[login_panal,main_panel,sesion_state]) | |
demo.load(self.load_demo, [sesion_state], [login_panal,main_panel]) | |
return demo | |
def create_interface(self): | |
# with gr.Blocks() as demo: | |
with gr.Row(): | |
self.create_Tabs() | |
#self.txturll = gr.Textbox(placeholder="link dir", interactive=True) | |
#self.btn_displayy = gr.Button("Load Dataset",scale=1, size="sm",variant="primary") | |
with gr.Row(): | |
with gr.Column(): | |
self.table = gr.Dataframe(value=self.datatable, headers=['text', 'audio'], interactive=True) | |
with gr.Row(equal_height=False): | |
self.prev_button = gr.Button("Previous Page",scale=1, size="sm",variant="primary") | |
self.page_number = gr.Number(value=self.current_page + 1, label="Page",scale=1) | |
self.next_button = gr.Button("Next Page",scale=1, size="sm",variant="primary") | |
with gr.Column(): | |
self.txtsaveurl = gr.Textbox(placeholder="Save Dataset", interactive=True) | |
self.btn_savedataset = gr.Button("Save Dataset",scale=1, size="sm",variant="primary") | |
self.label =gr.Text("STATE") | |
with gr.Column(): | |
self.txt_audio = gr.Textbox(label="Audio Text", interactive=True,rtl=True) | |
self.btn_record = gr.Audio(interactive=True) | |
with gr.Row(): | |
self.btn_save = gr.Button("Save", size="sm",variant="primary",min_width=50) | |
self.btn_saveAs = gr.Button("SaveAs", size="sm",variant="primary",min_width=50) | |
self.btn_enhance = gr.Button("enhance ", size="sm",variant="primary",min_width=50) | |
# self.buttonnext=gr.Button("Next",) | |
# self.buttonprev=gr.Button("Prev") | |
self.btn_delete = gr.Button("Delete", size="sm",variant="primary",min_width=50) | |
self.btn_displayy.click(self.read_dataset, [self.txturll], [self.table, self.txtsaveurl]) | |
self.table.select(self.on_select, None,[self.txt_audio,self.btn_record]) | |
self.btn_save.click(self.on_row_save, [self.txt_audio,self.btn_record],[self.txt_audio,self.btn_record]) | |
self.btn_saveAs.click(self.on_saveAs_row, [], [self.table,self.btn_record , self.txt_audio]) | |
self.btn_delete.click(self.on_row_delete, [], [self.table,self.btn_record , self.txt_audio]) | |
#self.buttonnext.click(lambda page:self.get_next_page(page+1), [self.page_number], [self.txt_audio]) | |
#self.buttonprev.click(lambda page:self.get_prev_page(page-1), [self.page_number], [self.txt_audio]) | |
self.btn_savedataset.click(self.convert_dataframe_to_dataset, [self.txtsaveurl], [self.label]) | |
self.prev_button.click(lambda page: self.update_page(page - 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number]) | |
#self.btn_save.click(self.save_row, [self.txt_audio,self.audio_player], [self.data_table]) | |
self.btn_enhance.click(lambda: remove_nn(self.get_output_audio()), [],self.btn_record) | |
self.buttonn.click(self.create_chunks_with_properties,[self.text_input,self.sigmant_word],[self.table]) | |
self.next_button.click(lambda page: self.update_page(page + 1), [self.page_number], [self.table, self.prev_button, self.next_button, self.page_number]) | |
#demo.launch() | |
df1=pd.DataFrame(columns=['text','flag','audio']) | |
editor = DataEditor(df1) | |
demo=editor.startt() | |
demo.launch() |