zouhairk commited on
Commit
73a9cc3
·
1 Parent(s): 9de27c4
Files changed (1) hide show
  1. app.py +43 -28
app.py CHANGED
@@ -1,35 +1,50 @@
 
1
  from transformers import AutoProcessor, SeamlessM4Tv2Model
2
  import numpy as np
3
- #import torchaudio
4
  import sounddevice as sd
5
- #from audio
 
 
 
6
 
7
  processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
8
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
9
 
10
- # from text
11
- text_inputs = processor(text = "Искам da polucha zdravnata mi karta i помощ", src_lang="bul", return_tensors="pt")
12
- audio_array_from_text = model.generate(**text_inputs, tgt_lang="fra")[0].cpu().numpy().squeeze()
13
-
14
- # Afficher le tableau dans le terminal
15
- print(audio_array_from_text)
16
-
17
- # Optionnel : Afficher seulement les 10 premières valeurs pour éviter trop de sorties
18
- print(audio_array_from_text[:10])
19
-
20
- sd.play(audio_array_from_text, samplerate=16000) # 16kHz est souvent utilisé par ces modèles
21
- sd.wait()
22
-
23
-
24
- audio, orig_freq = torchaudio.load("https://www2.cs.uic.edu/~i101/SoundFiles/preamble10.wav")
25
- # audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16_000) # must be a 16 kHz waveform array
26
- # audio_inputs = processor(audios=audio, return_tensors="pt")
27
- # audio_array_from_audio = model.generate(**audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
28
-
29
- # from audio
30
- output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
31
- translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
32
-
33
- # from text
34
- output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
35
- translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
  from transformers import AutoProcessor, SeamlessM4Tv2Model
3
  import numpy as np
 
4
  import sounddevice as sd
5
+ import wave
6
+ import os
7
+
8
+ app = Flask(__name__)
9
 
10
  processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
11
  model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
12
 
13
+ UPLOAD_FOLDER = "audio_files"
14
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
15
+
16
+ @app.route("/record", methods=["POST"])
17
+ def record_audio():
18
+ file = request.files['audio']
19
+ filename = os.path.join(UPLOAD_FOLDER, file.filename)
20
+ file.save(filename)
21
+
22
+ # Charger et traiter l'audio
23
+ audio_data, orig_freq = torchaudio.load(filename)
24
+ audio_inputs = processor(audios=audio_data, return_tensors="pt")
25
+ output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
26
+ translated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
27
+
28
+ return jsonify({"translated_text": translated_text})
29
+
30
+ @app.route("/text_to_speech", methods=["POST"])
31
+ def text_to_speech():
32
+ data = request.get_json()
33
+ text = data.get("text")
34
+ src_lang = data.get("src_lang")
35
+ tgt_lang = data.get("tgt_lang")
36
+
37
+ text_inputs = processor(text=text, src_lang=src_lang, return_tensors="pt")
38
+ audio_array = model.generate(**text_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
39
+
40
+ output_filename = os.path.join(UPLOAD_FOLDER, "output.wav")
41
+ with wave.open(output_filename, "wb") as wf:
42
+ wf.setnchannels(1)
43
+ wf.setsampwidth(2)
44
+ wf.setframerate(16000)
45
+ wf.writeframes((audio_array * 32767).astype(np.int16).tobytes())
46
+
47
+ return jsonify({"audio_url": output_filename})
48
+
49
+ if __name__ == "__main__":
50
+ app.run(debug=True)