Spaces:
Runtime error
Runtime error
File size: 2,569 Bytes
bfcb0a5 9de27c4 73a9cc3 bfcb0a5 a03524e 485f9b6 9de27c4 485f9b6 af79835 485f9b6 9de27c4 485f9b6 a03524e 485f9b6 a03524e 73a9cc3 bfcb0a5 bdaabef bfcb0a5 bdaabef bfcb0a5 73a9cc3 bfcb0a5 73a9cc3 bfcb0a5 73a9cc3 bfcb0a5 73a9cc3 bfcb0a5 73a9cc3 bfcb0a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from flask import Flask, request, jsonify
from transformers import AutoProcessor, SeamlessM4Tv2Model
import numpy as np
import wave
import os
from huggingface_hub import InferenceClient, login
import logging
# Configurer les logs de debug
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
# Définir et créer le dossier de cache pour Hugging Face
HUGGINGFACE_CACHE_DIR = "./huggingface_cache"
os.makedirs(HUGGINGFACE_CACHE_DIR, exist_ok=True)
logging.debug("Dossier de cache Hugging Face : %s", HUGGINGFACE_CACHE_DIR)
# Charger le processor et le modèle en utilisant le cache
logging.debug("Début du chargement du processor et du modèle...")
processor = AutoProcessor.from_pretrained(
"facebook/seamless-m4t-v2-large", cache_dir=HUGGINGFACE_CACHE_DIR
)
logging.debug("Processor chargé avec succès et mis en cache : %s", processor)
model = SeamlessM4Tv2Model.from_pretrained(
"facebook/seamless-m4t-v2-large", cache_dir=HUGGINGFACE_CACHE_DIR
)
logging.debug("Modèle chargé avec succès et mis en cache : %s", model)
UPLOAD_FOLDER = "audio_files"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
@app.route("/", methods=["GET"])
def return_text():
return jsonify({"text": "Hello, world!"})
@app.route("/record", methods=["POST"])
def record_audio():
file = request.files['audio']
filename = os.path.join(UPLOAD_FOLDER, file.filename)
file.save(filename)
# Charger et traiter l'audio
audio_data, orig_freq = torchaudio.load(filename)
audio_inputs = processor(audios=audio_data, return_tensors="pt")
output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
translated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
return jsonify({"translated_text": translated_text})
@app.route("/text_to_speech", methods=["POST"])
def text_to_speech():
data = request.get_json()
text = data.get("text")
src_lang = data.get("src_lang")
tgt_lang = data.get("tgt_lang")
text_inputs = processor(text=text, src_lang=src_lang, return_tensors="pt")
audio_array = model.generate(**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
output_filename = os.path.join(UPLOAD_FOLDER, "output.wav")
with wave.open(output_filename, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(16000)
wf.writeframes((audio_array * 32767).astype(np.int16).tobytes())
return jsonify({"audio_url": output_filename})
if __name__ == "__main__":
app.run(debug=True)
|