WajeehAzeemX commited on
Commit
f177433
·
1 Parent(s): 33f6d66

testing time

Browse files
Files changed (2) hide show
  1. app.py +58 -9
  2. app_backup.py +50 -1
app.py CHANGED
@@ -1,12 +1,36 @@
 
1
  from fastapi import FastAPI, Request, HTTPException
 
 
 
2
  import io
3
- import time
4
- from faster_whisper import WhisperModel
 
5
  import uvicorn
6
-
7
  app = FastAPI()
8
 
9
- model = WhisperModel("WajeehAzeemX/faster-whisper-smallar2-int8", device="cpu", compute_type="int8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  @app.post("/transcribe/")
12
  async def transcribe_audio(request: Request):
@@ -16,18 +40,43 @@ async def transcribe_audio(request: Request):
16
 
17
  # Convert binary data to a file-like object
18
  audio_file = io.BytesIO(audio_data)
 
 
 
 
 
 
19
 
20
- # Start timing the transcription
21
- start_time = time.time()
 
 
 
 
 
 
22
 
23
- # Transcribe the audio
24
- segments, info = model.transcribe(audio_file)
25
- transcription = " ".join([segment.text for segment in segments])
 
 
 
 
 
 
 
26
 
 
 
27
  # Calculate time taken
28
  time_taken = time.time() - start_time
 
29
 
30
  return {"transcription": transcription, "time_taken": time_taken}
31
  except Exception as e:
32
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
33
 
 
1
+
2
  from fastapi import FastAPI, Request, HTTPException
3
+ import torch
4
+ import torchaudio
5
+ from transformers import AutoProcessor, pipeline
6
  import io
7
+ from pydub import AudioSegment
8
+ from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
9
+ import numpy as np
10
  import uvicorn
11
+ import time
12
  app = FastAPI()
13
 
14
+ # Device configuration
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(device)
17
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
18
+
19
+ # Load the model and processor
20
+ model_id = "WajeehAzeemX/whisper-small-ar2_onnx"
21
+ model = ORTModelForSpeechSeq2Seq.from_pretrained(
22
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
23
+ )
24
+ processor = AutoProcessor.from_pretrained(model_id)
25
+
26
+
27
+ pipe = pipeline(
28
+ "automatic-speech-recognition",
29
+ model=model,
30
+ tokenizer=processor.tokenizer,
31
+ feature_extractor=processor.feature_extractor,
32
+ torch_dtype=torch_dtype,
33
+ )
34
 
35
  @app.post("/transcribe/")
36
  async def transcribe_audio(request: Request):
 
40
 
41
  # Convert binary data to a file-like object
42
  audio_file = io.BytesIO(audio_data)
43
+
44
+ # Load the audio file using pydub
45
+ try:
46
+ audio_segment = AudioSegment.from_file(audio_file, format="wav")
47
+ except Exception as e:
48
+ raise HTTPException(status_code=400, detail=f"Error loading audio file: {str(e)}")
49
 
50
+ # Convert to mono if the audio is stereo (multi-channel)
51
+ if audio_segment.channels > 1:
52
+ audio_segment = audio_segment.set_channels(1)
53
+
54
+ # Resample the audio to 16kHz
55
+ target_sample_rate = 16000
56
+ if audio_segment.frame_rate != target_sample_rate:
57
+ audio_segment = audio_segment.set_frame_rate(target_sample_rate)
58
 
59
+ # Convert audio to numpy array
60
+ audio_array = np.array(audio_segment.get_array_of_samples())
61
+ if audio_segment.sample_width == 2:
62
+ audio_array = audio_array.astype(np.float32) / 32768.0
63
+ else:
64
+ raise HTTPException(status_code=400, detail="Unsupported sample width")
65
+ start_time = time.time()
66
+ # Convert to the format expected by the model
67
+ inputs = processor(audio_array, sampling_rate=target_sample_rate, return_tensors="pt")
68
+ inputs = inputs.to(device)
69
 
70
+ # Get the transcription result
71
+ result = pipe(audio_array)
72
  # Calculate time taken
73
  time_taken = time.time() - start_time
74
+ transcription = result["text"]
75
 
76
  return {"transcription": transcription, "time_taken": time_taken}
77
  except Exception as e:
78
  raise HTTPException(status_code=500, detail=str(e))
79
+
80
+
81
+
82
 
app_backup.py CHANGED
@@ -72,4 +72,53 @@ async def transcribe_audio(request: Request):
72
 
73
  return {"transcription": transcription}
74
  except Exception as e:
75
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  return {"transcription": transcription}
74
  except Exception as e:
75
+ raise HTTPException(status_code=500, detail=str(e))
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+ # from fastapi import FastAPI, Request, HTTPException
93
+ # import io
94
+ # import time
95
+ # from faster_whisper import WhisperModel
96
+ # import uvicorn
97
+
98
+ # app = FastAPI()
99
+
100
+ # model = WhisperModel("WajeehAzeemX/faster-whisper-smallar2-int8", device="cpu", compute_type="int8")
101
+
102
+ # @app.post("/transcribe/")
103
+ # async def transcribe_audio(request: Request):
104
+ # try:
105
+ # # Read binary data from the request
106
+ # audio_data = await request.body()
107
+
108
+ # # Convert binary data to a file-like object
109
+ # audio_file = io.BytesIO(audio_data)
110
+
111
+ # # Start timing the transcription
112
+ # start_time = time.time()
113
+
114
+ # # Transcribe the audio
115
+ # segments, info = model.transcribe(audio_file)
116
+ # transcription = " ".join([segment.text for segment in segments])
117
+
118
+ # # Calculate time taken
119
+ # time_taken = time.time() - start_time
120
+
121
+ # return {"transcription": transcription, "time_taken": time_taken}
122
+ # except Exception as e:
123
+ # raise HTTPException(status_code=500, detail=str(e))
124
+