minhpng commited on
Commit
c5bb903
·
1 Parent(s): 7e4321a

add transformer get transcript

Browse files
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
- from routers import get_transcript, get_chatrespone
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
@@ -13,6 +13,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
13
 
14
  app.include_router(get_transcript.router)
15
  app.include_router(get_chatrespone.router)
 
16
 
17
  @app.get("/")
18
  def read_root():
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
+ from routers import get_transcript, get_chatrespone, get_transcript_transformer
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
 
13
 
14
  app.include_router(get_transcript.router)
15
  app.include_router(get_chatrespone.router)
16
+ app.include_router(get_transcript_transformer.router)
17
 
18
  @app.get("/")
19
  def read_root():
libs/transformer/get_transcript.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+
4
+
5
+
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "distil-whisper/distil-large-v3"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
13
+ )
14
+ model.to(device)
15
+
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ pipe = pipeline(
19
+ "automatic-speech-recognition",
20
+ model=model,
21
+ tokenizer=processor.tokenizer,
22
+ feature_extractor=processor.feature_extractor,
23
+ max_new_tokens=128,
24
+ torch_dtype=torch_dtype,
25
+ device=device,
26
+ return_timestamps=True
27
+ )
28
+
29
+
30
+ result = pipe("https://static.langkingdom.com/user_playlist_practice_videos/2114103294b5c15605fd59773e948e58.mp3")
31
+ print(result)
libs/transformer/get_transcript_2.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ def get_transcribe_transformers(url:str):
4
+ pipe = pipeline("automatic-speech-recognition", model="distil-whisper/distil-large-v3", return_timestamps=True)
5
+
6
+ result = pipe(url)
7
+
8
+ return result.get("text"), result.get("chunks")
9
+
10
+ text, chunks = get_transcribe_transformers("https://static.langkingdom.com/user_playlist_practice_videos/0a871d06c1e8e1174da09d5aad6ec550.x-m4a")
11
+
12
+ print(chunks[0].get("timestamp")[1])
routers/get_chatrespone.py CHANGED
@@ -24,7 +24,7 @@ class ChatInputForm(BaseModel):
24
  prompt: str
25
 
26
  @router.post("/")
27
- async def get_chat_respone(body: ChatInputForm):
28
 
29
  prompt = get_prompt(body.prompt)
30
 
@@ -57,17 +57,16 @@ def get_response(response: Iterator[str]):
57
  yield chunk
58
 
59
 
60
- checkWritting = """You'll be provided with a text: {prompt}
61
  ---------------
62
  IMPORTANT:
63
  - If the text is empty, do nothing.
64
  - If the given text maintains grammatical accuracy, no suggestions are needed.
65
  - If the given text is empty, do nothing.
66
  - If the given text contains any errors in grammatical accuracy, provide the corrected text.
67
-
68
  """
69
 
70
- template = """You are a helpful English teacher. Chat and do user requirement.
71
  Answer: Let's think step by step."""
72
  baiGiang = """Provide the given phrase in English. Provide the correct and popularly used English phrase along with its American IPA pronunciation and a brief explanation for it. Use the correct English phrase to create 4 example sentences along with the example IPA and brief meanings. Finally, suggest 4 similar English phrases with the correct English version, along with American IPA and their brief meanings.
73
  Provie your response in markdown format"""
 
24
  prompt: str
25
 
26
  @router.post("/")
27
+ async def get_chat_respone(body: ChatInputForm, api_key: str = Depends(get_api_key)):
28
 
29
  prompt = get_prompt(body.prompt)
30
 
 
57
  yield chunk
58
 
59
 
60
+ checkWritting = """You'll be provided with a text. Convert the text to standard English.
61
  ---------------
62
  IMPORTANT:
63
  - If the text is empty, do nothing.
64
  - If the given text maintains grammatical accuracy, no suggestions are needed.
65
  - If the given text is empty, do nothing.
66
  - If the given text contains any errors in grammatical accuracy, provide the corrected text.
 
67
  """
68
 
69
+ template = """You are a helpful assistant. Do whatever user require. Response in markdown format.
70
  Answer: Let's think step by step."""
71
  baiGiang = """Provide the given phrase in English. Provide the correct and popularly used English phrase along with its American IPA pronunciation and a brief explanation for it. Use the correct English phrase to create 4 example sentences along with the example IPA and brief meanings. Finally, suggest 4 similar English phrases with the correct English version, along with American IPA and their brief meanings.
72
  Provie your response in markdown format"""
routers/get_transcript.py CHANGED
@@ -105,4 +105,4 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
105
  # "text": " She can't help but smile wider."
106
  # }
107
  # ]
108
- # }
 
105
  # "text": " She can't help but smile wider."
106
  # }
107
  # ]
108
+ # }
routers/get_transcript_transformer.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from fastapi import APIRouter, Depends, HTTPException, status
3
+
4
+ from libs.transformer.get_transcript_2 import get_transcribe_transformers
5
+
6
+ from libs.header_api_auth import get_api_key
7
+
8
+
9
+ router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
10
+
11
+ @router.get("/")
12
+ def get_transcript(audio_path: str, api_key: str = Depends(get_api_key)):
13
+ st = time.time()
14
+
15
+ try:
16
+ text, chunks = get_transcribe_transformers(audio_path)
17
+ except Exception as error:
18
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
19
+
20
+ listSentences = []
21
+
22
+ for chunk in chunks:
23
+ listSentences.append({
24
+ "start_time": chunk.get("timestamp")[0],
25
+ "end_time": chunk.get("timestamp")[1],
26
+ "text": chunk.get("text")
27
+ })
28
+
29
+ et = time.time()
30
+ elapsed_time = et - st
31
+
32
+ return {"text": text,
33
+ 'list_sentence': listSentences,
34
+ 'elapsed_time': round(elapsed_time, 2)
35
+ }