minhpng commited on
Commit
7388360
·
1 Parent(s): 3c36fb5

using openai whisper + spaces api for chat

Browse files
libs/transformer/get_chat_gradio.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+
3
+ def get_chat_gradio(text: str):
4
+ client = Client("Xkev/Llama-3.2V-11B-cot")
5
+ result = client.predict(
6
+ message={"text":text,"files":[]},
7
+ max_new_tokens=512,
8
+ api_name="/chat"
9
+ )
10
+ return result
11
+
12
+ # res = get_chat_gradio("Hello, let's chat with me")
13
+
14
+ # print(res)
libs/transformer/open_ai_whisper.py CHANGED
@@ -2,27 +2,36 @@ import torch
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
 
4
 
5
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
6
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
-
8
- model_id = "openai/whisper-large-v3-turbo"
9
-
10
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
11
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
12
- )
13
- model.to(device)
14
-
15
- processor = AutoProcessor.from_pretrained(model_id)
16
-
17
- pipe = pipeline(
18
- "automatic-speech-recognition",
19
- model=model,
20
- tokenizer=processor.tokenizer,
21
- feature_extractor=processor.feature_extractor,
22
- torch_dtype=torch_dtype,
23
- device=device,
24
- return_timestamps=True
25
- )
26
-
27
- result = pipe("https://static.langkingdom.com/user_playlist_practice_videos/bdfd406cb3c62603f653fa02d93fcae8.mov")
28
- print(result["text"])
 
 
 
 
 
 
 
 
 
 
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
 
4
 
5
+ def open_ai_whisper_api(url: str):
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
+
9
+ model_id = "openai/whisper-large-v3-turbo"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
13
+ )
14
+ model.to(device)
15
+
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ pipe = pipeline(
19
+ "automatic-speech-recognition",
20
+ model=model,
21
+ tokenizer=processor.tokenizer,
22
+ feature_extractor=processor.feature_extractor,
23
+ torch_dtype=torch_dtype,
24
+ device=device,
25
+ return_timestamps=True,
26
+ )
27
+
28
+ generate_kwargs = {
29
+ "return_timestamps": True,
30
+ "language": "english"
31
+ }
32
+
33
+
34
+ result = pipe(url, generate_kwargs=generate_kwargs)
35
+ print('vao day')
36
+ return result.get('text'), result.get('chunks')
37
+
routers/get_chatrespone.py CHANGED
@@ -11,6 +11,8 @@ from pydantic import BaseModel
11
  from fastapi.responses import StreamingResponse
12
  from langchain_ollama import ChatOllama, OllamaLLM
13
 
 
 
14
  load_dotenv()
15
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
16
 
@@ -29,18 +31,20 @@ async def get_chat_respone(body: ChatInputForm, api_key: str = Depends(get_api_k
29
  prompt = get_prompt(body.prompt)
30
 
31
  try:
32
- llm = OllamaLLM(
33
- model=body.repo_id,
34
- temperature=0.2,
35
- # huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
36
- )
37
-
38
- messages = [
39
- ("system", prompt),
40
- ("human", body.textInput)
41
- ]
42
-
43
- response = llm.stream(messages)
 
 
44
 
45
  return StreamingResponse(get_response(response), media_type='text/event-stream')
46
  except Exception:
 
11
  from fastapi.responses import StreamingResponse
12
  from langchain_ollama import ChatOllama, OllamaLLM
13
 
14
+ from libs.transformer.get_chat_gradio import get_chat_gradio
15
+
16
  load_dotenv()
17
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
18
 
 
31
  prompt = get_prompt(body.prompt)
32
 
33
  try:
34
+ # llm = OllamaLLM(
35
+ # model=body.repo_id,
36
+ # temperature=0.2,
37
+ # # huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
38
+ # )
39
+
40
+ # messages = [
41
+ # ("system", prompt),
42
+ # ("human", body.textInput)
43
+ # ]
44
+
45
+ # response = llm.stream(messages)
46
+
47
+ response = get_chat_gradio(body.textInput)
48
 
49
  return StreamingResponse(get_response(response), media_type='text/event-stream')
50
  except Exception:
routers/get_transcript_gradio.py CHANGED
@@ -6,6 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, status
6
 
7
  from libs.convert_to_audio import convert_to_audio
8
  from libs.header_api_auth import get_api_key
 
9
  from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
10
 
11
 
@@ -18,27 +19,36 @@ def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-sma
18
 
19
  output_audio_folder = f"./cached/audio"
20
 
21
- if not os.path.exists(output_audio_folder):
22
- os.makedirs(output_audio_folder)
23
 
24
 
25
- output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
26
- convert_to_audio(audio_path.strip(), output_file)
27
 
28
  try:
29
- text = api_gradio_transcribe(output_file)
30
 
31
  except Exception as error:
32
  raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
33
- finally:
34
- if os.path.exists(output_file):
35
- os.remove(output_file)
 
 
 
 
 
 
 
 
 
36
 
37
  et = time.time()
38
 
39
  elapsed_time = et - st
40
 
41
  return {"text": text,
42
- 'list_sentence': [],
43
  'elapsed_time': round(elapsed_time, 2)
44
  }
 
6
 
7
  from libs.convert_to_audio import convert_to_audio
8
  from libs.header_api_auth import get_api_key
9
+ from libs.transformer.open_ai_whisper import open_ai_whisper_api
10
  from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
11
 
12
 
 
19
 
20
  output_audio_folder = f"./cached/audio"
21
 
22
+ # if not os.path.exists(output_audio_folder):
23
+ # os.makedirs(output_audio_folder)
24
 
25
 
26
+ # output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
27
+ # convert_to_audio(audio_path.strip(), output_file)
28
 
29
  try:
30
+ text, chunks = open_ai_whisper_api(audio_path)
31
 
32
  except Exception as error:
33
  raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
34
+ # finally:
35
+ # if os.path.exists(output_file):
36
+ # os.remove(output_file)
37
+
38
+ listSentences = []
39
+
40
+ for chunk in chunks:
41
+ listSentences.append({
42
+ "start_time": chunk.get("timestamp")[0],
43
+ "end_time": chunk.get("timestamp")[1],
44
+ "text": chunk.get("text")
45
+ })
46
 
47
  et = time.time()
48
 
49
  elapsed_time = et - st
50
 
51
  return {"text": text,
52
+ 'list_sentence': listSentences,
53
  'elapsed_time': round(elapsed_time, 2)
54
  }