wasmdashai commited on
Commit
0ff1aee
·
verified ·
1 Parent(s): b2e1dba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -162
app.py CHANGED
@@ -1,166 +1,22 @@
1
- import gradio as gr
2
- import spaces
3
- import torch
4
- from transformers import AutoTokenizer,VitsModel
5
- import os
6
- import numpy as np
7
- # Use a pipeline as a high-level helper
8
  from transformers import pipeline
 
9
 
 
10
  pipe = pipeline("text-generation", model="wasmdashai/Seed-Coder-8B-Instruct-V1")
11
- messages = [
12
- {"role": "user", "content": "Who are you?"},
13
- ]
14
- pipe(messages)
15
- token=os.environ.get("key_")
16
- tokenizer = AutoTokenizer.from_pretrained("wasmdashai/vtk",token=token)
17
- models= {}
18
-
19
- import noisereduce as nr
20
-
21
- import torch
22
- from typing import Any, Callable, Optional, Tuple, Union,Iterator
23
-
24
- import torch.nn as nn # Import the missing module
25
- def remove_noise_nr(audio_data,sr=16000):
26
- """يزيل الضوضاء باستخدام مكتبة noisereduce."""
27
- reduced_noise = nr.reduce_noise(y=audio_data,hop_length=256, sr=sr)
28
- return reduced_noise
29
-
30
- def _inference_forward_stream(
31
- self,
32
- input_ids: Optional[torch.Tensor] = None,
33
- attention_mask: Optional[torch.Tensor] = None,
34
- speaker_embeddings: Optional[torch.Tensor] = None,
35
- output_attentions: Optional[bool] = None,
36
- output_hidden_states: Optional[bool] = None,
37
- return_dict: Optional[bool] = None,
38
- padding_mask: Optional[torch.Tensor] = None,
39
- chunk_size: int = 32, # Chunk size for streaming output
40
- is_streaming: bool = True,
41
- ) -> Iterator[torch.Tensor]:
42
- """Generates speech waveforms in a streaming fashion."""
43
- if attention_mask is not None:
44
- padding_mask = attention_mask.unsqueeze(-1).float()
45
- else:
46
- padding_mask = torch.ones_like(input_ids).unsqueeze(-1).float()
47
-
48
-
49
-
50
- text_encoder_output = self.text_encoder(
51
- input_ids=input_ids,
52
- padding_mask=padding_mask,
53
- attention_mask=attention_mask,
54
- output_attentions=output_attentions,
55
- output_hidden_states=output_hidden_states,
56
- return_dict=return_dict,
57
- )
58
- hidden_states = text_encoder_output[0] if not return_dict else text_encoder_output.last_hidden_state
59
- hidden_states = hidden_states.transpose(1, 2)
60
- input_padding_mask = padding_mask.transpose(1, 2)
61
-
62
- prior_means = text_encoder_output[1] if not return_dict else text_encoder_output.prior_means
63
- prior_log_variances = text_encoder_output[2] if not return_dict else text_encoder_output.prior_log_variances
64
-
65
- if self.config.use_stochastic_duration_prediction:
66
- log_duration = self.duration_predictor(
67
- hidden_states,
68
- input_padding_mask,
69
- speaker_embeddings,
70
- reverse=True,
71
- noise_scale=self.noise_scale_duration,
72
- )
73
- else:
74
- log_duration = self.duration_predictor(hidden_states, input_padding_mask, speaker_embeddings)
75
-
76
- length_scale = 1.0 / self.speaking_rate
77
- duration = torch.ceil(torch.exp(log_duration) * input_padding_mask * length_scale)
78
- predicted_lengths = torch.clamp_min(torch.sum(duration, [1, 2]), 1).long()
79
-
80
-
81
- # Create a padding mask for the output lengths of shape (batch, 1, max_output_length)
82
- indices = torch.arange(predicted_lengths.max(), dtype=predicted_lengths.dtype, device=predicted_lengths.device)
83
- output_padding_mask = indices.unsqueeze(0) < predicted_lengths.unsqueeze(1)
84
- output_padding_mask = output_padding_mask.unsqueeze(1).to(input_padding_mask.dtype)
85
-
86
- # Reconstruct an attention tensor of shape (batch, 1, out_length, in_length)
87
- attn_mask = torch.unsqueeze(input_padding_mask, 2) * torch.unsqueeze(output_padding_mask, -1)
88
- batch_size, _, output_length, input_length = attn_mask.shape
89
- cum_duration = torch.cumsum(duration, -1).view(batch_size * input_length, 1)
90
- indices = torch.arange(output_length, dtype=duration.dtype, device=duration.device)
91
- valid_indices = indices.unsqueeze(0) < cum_duration
92
- valid_indices = valid_indices.to(attn_mask.dtype).view(batch_size, input_length, output_length)
93
- padded_indices = valid_indices - nn.functional.pad(valid_indices, [0, 0, 1, 0, 0, 0])[:, :-1]
94
- attn = padded_indices.unsqueeze(1).transpose(2, 3) * attn_mask
95
-
96
- # Expand prior distribution
97
- prior_means = torch.matmul(attn.squeeze(1), prior_means).transpose(1, 2)
98
- prior_log_variances = torch.matmul(attn.squeeze(1), prior_log_variances).transpose(1, 2)
99
-
100
- prior_latents = prior_means + torch.randn_like(prior_means) * torch.exp(prior_log_variances) * self.noise_scale
101
- latents = self.flow(prior_latents, output_padding_mask, speaker_embeddings, reverse=True)
102
-
103
- spectrogram = latents * output_padding_mask
104
- if is_streaming:
105
-
106
- for i in range(0, spectrogram.size(-1), chunk_size):
107
- with torch.no_grad():
108
- wav=self.decoder(spectrogram[:,:,i : i + chunk_size] ,speaker_embeddings)
109
- yield wav.squeeze().cpu().numpy()
110
- else:
111
-
112
- wav=self.decoder(spectrogram,speaker_embeddings)
113
- yield wav.squeeze().cpu().numpy()
114
- @spaces.GPU
115
- def get_model(name_model):
116
- global models
117
- if name_model in models:
118
- return models[name_model]
119
- models[name_model]=VitsModel.from_pretrained(name_model,token=token).cuda()
120
- models[name_model].decoder.apply_weight_norm()
121
- # torch.nn.utils.weight_norm(self.decoder.conv_pre)
122
- # torch.nn.utils.weight_norm(self.decoder.conv_post)
123
- for flow in models[name_model].flow.flows:
124
- torch.nn.utils.weight_norm(flow.conv_pre)
125
- torch.nn.utils.weight_norm(flow.conv_post)
126
- return models[name_model]
127
-
128
-
129
- zero = torch.Tensor([0]).cuda()
130
- print(zero.device) # <-- 'cpu' 🤔
131
- import torch
132
- TXT="""السلام عليكم ورحمة الله وبركاتة يا هلا وسهلا ومراحب بالغالي اخباركم طيبين ان شاء الله ارحبوا على العين والراس """
133
- @spaces.GPU
134
- def modelspeech(text=TXT,name_model="wasmdashai/vits-ar-sa-huba-v2",speaking_rate=16000):
135
-
136
-
137
- inputs = tokenizer(text, return_tensors="pt")
138
- model=get_model(name_model)
139
- model.speaking_rate=speaking_rate
140
- with torch.no_grad():
141
- wav=list(_inference_forward_stream(model,input_ids=inputs.input_ids.cuda(),attention_mask=inputs.attention_mask.cuda(),speaker_embeddings= None,is_streaming=False))[0]
142
- # with torch.no_grad():
143
- # wav = model(input_ids=inputs["input_ids"].cuda()).waveform.cpu().numpy().reshape(-1)#.detach()
144
-
145
- return (model.config.sampling_rate,wav),(model.config.sampling_rate,remove_noise_nr(wav))
146
-
147
- model_choices = gr.Dropdown(
148
- choices=[
149
-
150
- "wasmdashai/vits-ar-sa-huba-v1",
151
- "wasmdashai/vits-ar-sa-huba-v2",
152
-
153
- "wasmdashai/vits-ar-sa-A",
154
- "wasmdashai/vits-ar-ye-sa",
155
- "wasmdashai/vits-ar-sa-M-v1",
156
- "wasmdashai/vits-ar-sa-M-v2"
157
-
158
-
159
- ],
160
- label="اختر النموذج",
161
- value="wasmdashai/vits-ar-sa-huba-v2",
162
- )
163
 
164
- demo = gr.Interface(fn=modelspeech, inputs=["text",model_choices,gr.Slider(0, 1, step=0.1,value=0.8)], outputs=["audio","audio"])
165
- demo.queue()
166
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import pipeline
2
+ import gradio as gr
3
 
4
+ # تحميل النموذج
5
  pipe = pipeline("text-generation", model="wasmdashai/Seed-Coder-8B-Instruct-V1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # دالة توليد الردود
8
+ def chat_with_model(user_input):
9
+ messages = [
10
+ {"role": "user", "content": user_input},
11
+ ]
12
+ output = pipe(messages, max_new_tokens=200, do_sample=True)
13
+ return output[0]['generated_text']
14
+
15
+ # واجهة Gradio
16
+ gr.Interface(
17
+ fn=chat_with_model,
18
+ inputs=gr.Textbox(lines=2, placeholder="اكتب سؤالك هنا..."),
19
+ outputs="text",
20
+ title="Seed-Coder Chat",
21
+ description="نموذج Seed-Coder للإجابة على الأسئلة باستخدام نموذج توليد النصوص"
22
+ ).launch()