Spaces:
Sleeping
Sleeping
using openai whisper + spaces api for chat
Browse files- libs/transformer/get_chat_gradio.py +14 -0
- libs/transformer/open_ai_whisper.py +33 -24
- routers/get_chatrespone.py +16 -12
- routers/get_transcript_gradio.py +19 -9
libs/transformer/get_chat_gradio.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client
|
2 |
+
|
3 |
+
def get_chat_gradio(text: str):
|
4 |
+
client = Client("Xkev/Llama-3.2V-11B-cot")
|
5 |
+
result = client.predict(
|
6 |
+
message={"text":text,"files":[]},
|
7 |
+
max_new_tokens=512,
|
8 |
+
api_name="/chat"
|
9 |
+
)
|
10 |
+
return result
|
11 |
+
|
12 |
+
# res = get_chat_gradio("Hello, let's chat with me")
|
13 |
+
|
14 |
+
# print(res)
|
libs/transformer/open_ai_whisper.py
CHANGED
@@ -2,27 +2,36 @@ import torch
|
|
2 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
3 |
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
3 |
|
4 |
|
5 |
+
def open_ai_whisper_api(url: str):
|
6 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
7 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
8 |
+
|
9 |
+
model_id = "openai/whisper-large-v3-turbo"
|
10 |
+
|
11 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
12 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
13 |
+
)
|
14 |
+
model.to(device)
|
15 |
+
|
16 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
17 |
+
|
18 |
+
pipe = pipeline(
|
19 |
+
"automatic-speech-recognition",
|
20 |
+
model=model,
|
21 |
+
tokenizer=processor.tokenizer,
|
22 |
+
feature_extractor=processor.feature_extractor,
|
23 |
+
torch_dtype=torch_dtype,
|
24 |
+
device=device,
|
25 |
+
return_timestamps=True,
|
26 |
+
)
|
27 |
+
|
28 |
+
generate_kwargs = {
|
29 |
+
"return_timestamps": True,
|
30 |
+
"language": "english"
|
31 |
+
}
|
32 |
+
|
33 |
+
|
34 |
+
result = pipe(url, generate_kwargs=generate_kwargs)
|
35 |
+
print('vao day')
|
36 |
+
return result.get('text'), result.get('chunks')
|
37 |
+
|
routers/get_chatrespone.py
CHANGED
@@ -11,6 +11,8 @@ from pydantic import BaseModel
|
|
11 |
from fastapi.responses import StreamingResponse
|
12 |
from langchain_ollama import ChatOllama, OllamaLLM
|
13 |
|
|
|
|
|
14 |
load_dotenv()
|
15 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
|
16 |
|
@@ -29,18 +31,20 @@ async def get_chat_respone(body: ChatInputForm, api_key: str = Depends(get_api_k
|
|
29 |
prompt = get_prompt(body.prompt)
|
30 |
|
31 |
try:
|
32 |
-
llm = OllamaLLM(
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
)
|
37 |
-
|
38 |
-
messages = [
|
39 |
-
|
40 |
-
|
41 |
-
]
|
42 |
-
|
43 |
-
response = llm.stream(messages)
|
|
|
|
|
44 |
|
45 |
return StreamingResponse(get_response(response), media_type='text/event-stream')
|
46 |
except Exception:
|
|
|
11 |
from fastapi.responses import StreamingResponse
|
12 |
from langchain_ollama import ChatOllama, OllamaLLM
|
13 |
|
14 |
+
from libs.transformer.get_chat_gradio import get_chat_gradio
|
15 |
+
|
16 |
load_dotenv()
|
17 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
|
18 |
|
|
|
31 |
prompt = get_prompt(body.prompt)
|
32 |
|
33 |
try:
|
34 |
+
# llm = OllamaLLM(
|
35 |
+
# model=body.repo_id,
|
36 |
+
# temperature=0.2,
|
37 |
+
# # huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
38 |
+
# )
|
39 |
+
|
40 |
+
# messages = [
|
41 |
+
# ("system", prompt),
|
42 |
+
# ("human", body.textInput)
|
43 |
+
# ]
|
44 |
+
|
45 |
+
# response = llm.stream(messages)
|
46 |
+
|
47 |
+
response = get_chat_gradio(body.textInput)
|
48 |
|
49 |
return StreamingResponse(get_response(response), media_type='text/event-stream')
|
50 |
except Exception:
|
routers/get_transcript_gradio.py
CHANGED
@@ -6,6 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
|
6 |
|
7 |
from libs.convert_to_audio import convert_to_audio
|
8 |
from libs.header_api_auth import get_api_key
|
|
|
9 |
from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
|
10 |
|
11 |
|
@@ -18,27 +19,36 @@ def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-sma
|
|
18 |
|
19 |
output_audio_folder = f"./cached/audio"
|
20 |
|
21 |
-
if not os.path.exists(output_audio_folder):
|
22 |
-
|
23 |
|
24 |
|
25 |
-
output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
|
26 |
-
convert_to_audio(audio_path.strip(), output_file)
|
27 |
|
28 |
try:
|
29 |
-
text =
|
30 |
|
31 |
except Exception as error:
|
32 |
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
|
33 |
-
finally:
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
et = time.time()
|
38 |
|
39 |
elapsed_time = et - st
|
40 |
|
41 |
return {"text": text,
|
42 |
-
'list_sentence':
|
43 |
'elapsed_time': round(elapsed_time, 2)
|
44 |
}
|
|
|
6 |
|
7 |
from libs.convert_to_audio import convert_to_audio
|
8 |
from libs.header_api_auth import get_api_key
|
9 |
+
from libs.transformer.open_ai_whisper import open_ai_whisper_api
|
10 |
from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
|
11 |
|
12 |
|
|
|
19 |
|
20 |
output_audio_folder = f"./cached/audio"
|
21 |
|
22 |
+
# if not os.path.exists(output_audio_folder):
|
23 |
+
# os.makedirs(output_audio_folder)
|
24 |
|
25 |
|
26 |
+
# output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
|
27 |
+
# convert_to_audio(audio_path.strip(), output_file)
|
28 |
|
29 |
try:
|
30 |
+
text, chunks = open_ai_whisper_api(audio_path)
|
31 |
|
32 |
except Exception as error:
|
33 |
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
|
34 |
+
# finally:
|
35 |
+
# if os.path.exists(output_file):
|
36 |
+
# os.remove(output_file)
|
37 |
+
|
38 |
+
listSentences = []
|
39 |
+
|
40 |
+
for chunk in chunks:
|
41 |
+
listSentences.append({
|
42 |
+
"start_time": chunk.get("timestamp")[0],
|
43 |
+
"end_time": chunk.get("timestamp")[1],
|
44 |
+
"text": chunk.get("text")
|
45 |
+
})
|
46 |
|
47 |
et = time.time()
|
48 |
|
49 |
elapsed_time = et - st
|
50 |
|
51 |
return {"text": text,
|
52 |
+
'list_sentence': listSentences,
|
53 |
'elapsed_time': round(elapsed_time, 2)
|
54 |
}
|