Spaces:
Sleeping
Sleeping
File size: 7,106 Bytes
0ca719f c2c01b8 0ca719f b051926 52264b1 b051926 52264b1 b051926 0ca719f 11cf7c8 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 52264b1 b051926 52264b1 b051926 52264b1 b051926 0ca719f b051926 0ca719f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
MODEL_NAME = "seiching/whisper-small-seiching"
#MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
import tiktoken
def call_openai_api(openaiobj,transcription):
response = openaiobj.chat.completions.create(
model="gpt-3.5-turbo",
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請以條列式,列出討論事項及結論,討論內容細節請略過,要用比較正式及容易閱讀的寫法,避免口語化"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def split_into_chunks(text, tokens=500):
encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
words = encoding.encode(text)
chunks = []
for i in range(0, len(words), tokens):
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
return chunks
def process_chunks(openaikeystr,inputtext):
# openaiobj = OpenAI(
# # This is the default and can be omitted
# api_key=openaikeystr,
# )
openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
text = inputtext
#openaikey.set_key(openaikeystr)
#print('process_chunk',openaikey.get_key())
chunks = split_into_chunks(text)
response=''
for chunk in chunks:
response=response+call_openai_api(openaiobj,chunk)
return response
# # Processes chunks in parallel
# with ThreadPoolExecutor() as executor:
# responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
# return responses
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
MODEL_NAME = "seiching/whisper-small-seiching"
BATCH_SIZE = 8
transcribe_text="this is a test"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
if seconds is not None:
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
else:
# we have a malformed timestamp so just return it as is
return seconds
def transcribe(file, task, return_timestamps):
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task,"language": "chinese",}, return_timestamps=return_timestamps)
text = outputs["text"]
if return_timestamps:
timestamps = outputs["chunks"]
timestamps = [
f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
for chunk in timestamps
]
text = "\n".join(str(feature) for feature in timestamps)
global transcribe_text
transcribe_text=text
# with open('asr_resul.txt', 'w') as f:
# f.write(text)
# ainotes=process_chunks(text)
# with open("ainotes_result.txt", "a") as f:
# f.write(ainotes)
return text
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
def writenotes(apikeystr,inputscript):
#text=transcribe_text
#openaikey.set_key(inputkey)
#openaikey = OpenAIKeyClass(inputkey)
print('ok')
if len(inputscript)>10:
transcribe_text=inputscript
ainotestext=process_chunks(apikeystr,transcribe_text)
#ainotestext=""
# with open('asr_resul.txt', 'w') as f:
# #print(transcribe_text)
# # f.write(inputkey)
# f.write(transcribe_text)
# with open('ainotes.txt','w') as f:
# f.write(ainotestext)
return ainotestext
ainotes = gr.Interface(
fn=writenotes,
inputs=[gr.Textbox(label="OPEN AI API KEY",placeholder="請輸入sk..."),gr.Textbox(label="逐字稿",placeholder="請輸入逐字稿")],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
with demo:
gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])
demo.launch(enable_queue=True) |