Spaces:

HugoLagoRodrigues
/

translate

Sleeping

App Files Files Community

Hugo Rodrigues commited on Jan 31, 2024

Commit

99797ef

1 Parent(s): b2430fa

audio endpoint

Browse files

Files changed (4) hide show

.gitignore +3 -1
README.md +35 -0
main.py +30 -2
requirements.txt +2 -1

.gitignore CHANGED Viewed

@@ -19,4 +19,6 @@ __pycache__/
 .gdb_history
 .vscode/
 # Other
-.DS_Store

 .gdb_history
 .vscode/
 # Other
+.DS_Store
+*.wav

README.md CHANGED Viewed

@@ -25,3 +25,38 @@ VS Code Python select interpreter hf
 ```
 docker compose up --build
 ```

 ```
 docker compose up --build
 ```
+## Tests
+Translate from English (eng) to Portuguese (por) the following text: "we the people of the united states in order to form a more perfect union establish justice ensure domestic tranquillity provide for the common defense"
+mac book pro M1 16GB device = cpu.
+- Do not run. Not enougth memory
+HF CPU free
+- 8.99 sec
+- 9.06 sec
+- 8.77 sec
+T4 small
+- 1.18 sec
+- 1.12 sec
+- 1.12 sec
+A10G small
+- 1.02 sec
+- 1.00 sec
+- 1.06 sec
+- 1.01 sec
+2xA10G large
+- 0.97 sec
+- 0.95 sec
+- 0.95 sec
+- 0.95 sec
+- 0.95 sec

main.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import time
 # from typing import Union
 # from pydantic import BaseModel
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 # from fastapi.staticfiles import StaticFiles
 # from fastapi.responses import FileResponse
@@ -14,8 +17,11 @@ import torch
 from transformers import SeamlessM4Tv2Model
 from transformers import AutoProcessor
-processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
-model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -63,3 +69,25 @@ def transcribe(inputs, src_lang="eng", tgt_lang="por"):
     print("Time took to process the request and return response is {} sec".format(
         time.time() - start_time))
     return translated_text_from_text

 import time
+from scipy.io.wavfile import write
 # from typing import Union
 # from pydantic import BaseModel
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
 # from fastapi.staticfiles import StaticFiles
 # from fastapi.responses import FileResponse
 from transformers import SeamlessM4Tv2Model
 from transformers import AutoProcessor
+model_name = "facebook/seamless-m4t-v2-large"
+# model_name = "facebook/hf-seamless-m4t-medium"
+processor = AutoProcessor.from_pretrained(model_name)
+model = SeamlessM4Tv2Model.from_pretrained(model_name)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     print("Time took to process the request and return response is {} sec".format(
         time.time() - start_time))
     return translated_text_from_text
+@app.get("/audio")
+async def audio(inputs, src_lang="eng", tgt_lang="por", speaker_id=5):
+    start_time = time.time()
+    if inputs is None:
+        raise "No audio file submitted! Please upload or record an audio file before submitting your request."
+    text_inputs = processor(text=inputs,
+                            src_lang=src_lang, return_tensors="pt").to(device)
+    audio_array_from_text = model.generate(
+        **text_inputs, tgt_lang=tgt_lang, speaker_id=int(speaker_id))[0].cpu().numpy().squeeze()
+    print("Time took to process the request and return response is {} sec".format(
+        time.time() - start_time))
+    write("output.wav", model.config.sampling_rate,
+          audio_array_from_text)
+    return FileResponse('output.wav', media_type="audio/mpeg")

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ sentencepiece
 protobuf
 torch
 uvicorn[standard]
-ffmpeg

 protobuf
 torch
 uvicorn[standard]
+ffmpeg
+scipy