Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
dee3b49
1
Parent(s):
42657a1
app.py
CHANGED
@@ -116,10 +116,26 @@ if __name__ == "__main__":
|
|
116 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
117 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
# Load model and processor with LM
|
120 |
-
processor = Wav2Vec2ProcessorWithLM.from_pretrained(
|
121 |
model = Wav2Vec2ForCTC.from_pretrained(
|
122 |
-
|
123 |
torch_dtype=torch_dtype
|
124 |
).to(device)
|
125 |
|
|
|
116 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
117 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
118 |
|
119 |
+
MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-dv-syn-md" # Trained on 100% Synthetic Data (130-150 Hours)
|
120 |
+
# MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-cv" # Trained on Common Voice Data (Unknown Hours)
|
121 |
+
# MODEL_NAME = "alakxender/whisper-small-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
|
122 |
+
# MODEL_NAME = "alakxender/whisper-small-cv" # Trained on Common Voice Data (Unknown Hours)
|
123 |
+
# MODEL_NAME = "alakxender/whisper-medium-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
|
124 |
+
# MODEL_NAME = "alakxender/whisper-medium-cv" # Trained on Common Voice Data (Unknown Hours)
|
125 |
+
# MODEL_NAME = "alakxender/whisper-large-v3-dv-syn-md" # Trained on 100% Synthetic Data (150 Hours)
|
126 |
+
# MODEL_NAME = "alakxender/whisper-large-v3-cv" # Trained on Common Voice Data (Unknown Hours)
|
127 |
+
# MODEL_NAME = "alakxender/whisper-large-v3-calls-md" # Trained on phone calls (65 Hours)
|
128 |
+
# MODEL_NAME = "alakxender/wav2vec2-large-mms-1b-calls-md" # Trained on phone calls (65 Hours)
|
129 |
+
# MODEL_NAME = "alakxender/wav2vec2-large-xlsr-calls-md" # Trained on phone calls (23 Hours)
|
130 |
+
# MODEL_NAME = "alakxender/wav2vec2-large-xlsr-dv-syn-md" # Trained on 100% Synthetic Data (80 Hours)
|
131 |
+
# MODEL_NAME = "alakxender/dhivehi-asr-full-ctc" # Trained on multiple datasets (350+ Hours)
|
132 |
+
# MODEL_NAME = "alakxender/dhivehi-asr-full-ctc-v2" # Trained on multiple datasets (350+ Hours)
|
133 |
+
# MODEL_NAME = "alakxender/dhivehi-asr-full-whisper-v3" # Trained on multiple datasets (350+ Hours)
|
134 |
+
|
135 |
# Load model and processor with LM
|
136 |
+
processor = Wav2Vec2ProcessorWithLM.from_pretrained(MODEL_NAME)
|
137 |
model = Wav2Vec2ForCTC.from_pretrained(
|
138 |
+
MODEL_NAME,
|
139 |
torch_dtype=torch_dtype
|
140 |
).to(device)
|
141 |
|