Spaces:
Building
Building
Commit
·
1bead67
1
Parent(s):
bb0a58d
Files added
Browse files
app.py
CHANGED
@@ -1,29 +1,23 @@
|
|
1 |
import os
|
2 |
-
# Append /usr/bin to PATH
|
3 |
-
os.environ["PATH"] += os.pathsep + "/usr/bin"
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
from flask import Flask, request, jsonify, render_template
|
8 |
-
import
|
9 |
-
import torch
|
10 |
-
import Levenshtein
|
11 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
12 |
-
from io import BytesIO
|
13 |
from flask_cors import CORS
|
14 |
-
from pydub import AudioSegment
|
|
|
|
|
15 |
|
|
|
16 |
AudioSegment.converter = "/usr/bin/ffmpeg"
|
17 |
AudioSegment.ffprobe = "/usr/bin/ffprobe"
|
18 |
|
|
|
19 |
os.environ['HF_HOME'] = '/tmp/.cache'
|
20 |
|
21 |
app = Flask(__name__)
|
22 |
CORS(app)
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
27 |
|
28 |
|
29 |
def convert_to_wav(audio_bytes):
|
@@ -39,19 +33,16 @@ def convert_to_wav(audio_bytes):
|
|
39 |
return None
|
40 |
|
41 |
|
42 |
-
def
|
43 |
-
"""Transcribes the audio using
|
44 |
-
wav_io = convert_to_wav(audio_bytes)
|
45 |
if wav_io is None:
|
46 |
raise Exception("Could not convert audio to WAV format")
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
predicted_ids = torch.argmax(logits, dim=-1)
|
53 |
-
transcription = processor.batch_decode(predicted_ids)[0].strip()
|
54 |
-
return transcription
|
55 |
|
56 |
|
57 |
def levenshtein_similarity(transcription1, transcription2):
|
@@ -74,8 +65,8 @@ def transcribe():
|
|
74 |
user_audio_bytes = user_audio.read()
|
75 |
|
76 |
try:
|
77 |
-
transcription_original =
|
78 |
-
transcription_user =
|
79 |
except Exception as e:
|
80 |
return jsonify({"error": str(e)}), 500
|
81 |
|
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
|
|
2 |
from flask import Flask, request, jsonify, render_template
|
3 |
+
from transformers import pipeline
|
|
|
|
|
|
|
|
|
4 |
from flask_cors import CORS
|
5 |
+
from pydub import AudioSegment
|
6 |
+
from io import BytesIO
|
7 |
+
import Levenshtein
|
8 |
|
9 |
+
# Set the FFmpeg paths explicitly
|
10 |
AudioSegment.converter = "/usr/bin/ffmpeg"
|
11 |
AudioSegment.ffprobe = "/usr/bin/ffprobe"
|
12 |
|
13 |
+
# Set Hugging Face cache directory to avoid permission issues
|
14 |
os.environ['HF_HOME'] = '/tmp/.cache'
|
15 |
|
16 |
app = Flask(__name__)
|
17 |
CORS(app)
|
18 |
|
19 |
+
# Use Hugging Face ASR pipeline for automatic speech recognition
|
20 |
+
asr_pipeline = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-arabic")
|
|
|
21 |
|
22 |
|
23 |
def convert_to_wav(audio_bytes):
|
|
|
33 |
return None
|
34 |
|
35 |
|
36 |
+
def transcribe_audio(audio_bytes):
|
37 |
+
"""Transcribes the audio using the Hugging Face ASR pipeline."""
|
38 |
+
wav_io = convert_to_wav(audio_bytes)
|
39 |
if wav_io is None:
|
40 |
raise Exception("Could not convert audio to WAV format")
|
41 |
|
42 |
+
# Read the audio file into bytes for the ASR pipeline
|
43 |
+
wav_io.seek(0)
|
44 |
+
transcription = asr_pipeline(wav_io)["text"]
|
45 |
+
return transcription.strip()
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
def levenshtein_similarity(transcription1, transcription2):
|
|
|
65 |
user_audio_bytes = user_audio.read()
|
66 |
|
67 |
try:
|
68 |
+
transcription_original = transcribe_audio(original_audio_bytes)
|
69 |
+
transcription_user = transcribe_audio(user_audio_bytes)
|
70 |
except Exception as e:
|
71 |
return jsonify({"error": str(e)}), 500
|
72 |
|