minhpng commited on
Commit
417e147
·
1 Parent(s): 669dcca

add convert to audio

Browse files
Dockerfile CHANGED
@@ -1,6 +1,10 @@
 
 
 
1
  # Use the official Python 3.10.9 image
2
  FROM python:3.12.1
3
 
 
4
  WORKDIR /app
5
 
6
  # Copy the current directory contents into the container at .
 
1
+
2
+
3
+
4
  # Use the official Python 3.10.9 image
5
  FROM python:3.12.1
6
 
7
+ RUN apt-get update -qq && apt-get install ffmpeg -y
8
  WORKDIR /app
9
 
10
  # Copy the current directory contents into the container at .
libs/convert_to_audio.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ def convert_to_audio(input_file, output_file):
5
+ ffmpeg_command = [
6
+ "ffmpeg",
7
+ "-i", input_file,
8
+ "-vn",
9
+ "-acodec", "libmp3lame",
10
+ "-ab", "96k",
11
+ "-ar", "44100",
12
+ "-y",
13
+ output_file
14
+ ]
15
+ try:
16
+ subprocess.run(ffmpeg_command, check=True)
17
+ except subprocess.CalledProcessError as e:
18
+ print("Error: failed to convert audio")
routers/get_transcript.py CHANGED
@@ -2,6 +2,9 @@ import time
2
  from fastapi import APIRouter, Depends, HTTPException, status
3
  from faster_whisper import WhisperModel
4
 
 
 
 
5
  from libs.header_api_auth import get_api_key
6
 
7
  router = APIRouter(prefix="/get-transcript", tags=["transcript"])
@@ -23,18 +26,29 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
23
 
24
  print(f"model>>>: {model_size}")
25
 
 
 
 
 
 
 
 
26
  st = time.time()
27
 
 
28
 
29
  try:
30
  model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
31
  segments, info = model_run.transcribe(
32
- audio_path,
33
  beam_size=16,
34
  language="en",
35
  condition_on_previous_text=False,
36
  )
 
37
  except Exception as error:
 
 
38
  raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
39
 
40
  text = ""
@@ -49,11 +63,13 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
49
  "text": segment.text
50
  })
51
 
52
-
53
  et = time.time()
54
  elapsed_time = et - st
 
 
55
  return {"text": text,
56
- 'list_sentence': listSentences
 
57
  }
58
 
59
  # time.sleep(5)
 
2
  from fastapi import APIRouter, Depends, HTTPException, status
3
  from faster_whisper import WhisperModel
4
 
5
+ import os
6
+
7
+ from libs.convert_to_audio import convert_to_audio
8
  from libs.header_api_auth import get_api_key
9
 
10
  router = APIRouter(prefix="/get-transcript", tags=["transcript"])
 
26
 
27
  print(f"model>>>: {model_size}")
28
 
29
+ output_audio_folder = f"./cached/audio"
30
+
31
+ if not os.path.exists(output_audio_folder):
32
+ os.makedirs(output_audio_folder)
33
+
34
+ output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
35
+
36
  st = time.time()
37
 
38
+ convert_to_audio(audio_path.strip(), output_file)
39
 
40
  try:
41
  model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
42
  segments, info = model_run.transcribe(
43
+ output_file,
44
  beam_size=16,
45
  language="en",
46
  condition_on_previous_text=False,
47
  )
48
+ os.remove(output_file)
49
  except Exception as error:
50
+ if os.path.exists(output_file):
51
+ os.remove(output_file)
52
  raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
53
 
54
  text = ""
 
63
  "text": segment.text
64
  })
65
 
 
66
  et = time.time()
67
  elapsed_time = et - st
68
+
69
+
70
  return {"text": text,
71
+ 'list_sentence': listSentences,
72
+ 'elapsed_time': round(elapsed_time, 2)
73
  }
74
 
75
  # time.sleep(5)