Spaces:
Sleeping
Sleeping
File size: 11,882 Bytes
0ca719f b7ba5fb 0ca719f b7ba5fb bd2f0d0 d182662 9c0b98f 0ca719f 4082345 8ee0b64 0ca719f b051926 dab3682 b051926 bc1215e b051926 5e977c0 b051926 dab3682 5e5f20d bc1215e 5e5f20d dab3682 5e5f20d 40ae977 083e8d5 dab3682 083e8d5 5e5f20d b051926 dab3682 bc1215e dab3682 b051926 dab3682 52264b1 b051926 52264b1 44d039c b7ba5fb 6091c72 b7ba5fb b051926 dab3682 7e749fe 083e8d5 dab3682 083e8d5 7222f55 dab3682 083e8d5 dab3682 083e8d5 bd2f0d0 dab3682 494d547 b051926 3ef2f86 b051926 0ca719f 548d77e 0ca719f b051926 0ca719f 548d77e 0ca719f b051926 0ca719f b051926 2d533cf b051926 0ca719f 548d77e 0ca719f b051926 0ca719f b051926 2d533cf b051926 0ca719f b051926 bd2f0d0 dab3682 40ae977 e818920 40ae977 e818920 dab3682 e818920 a5339f4 4082345 a5339f4 bd2f0d0 4082345 bd2f0d0 4082345 b051926 548d77e b051926 bd2f0d0 dab3682 bd2f0d0 40ae977 bd2f0d0 4082345 bd2f0d0 6091c72 b051926 6fce298 b051926 494d547 2d533cf b051926 0ca719f b051926 0ca719f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
import os
hugapikey=os.environ['openaikey']
genaikey=os.environ['genaikey']
#MODEL_NAME = "seiching/whisper-small-seiching"
MODEL_NAME = "openai/whisper-tiny"
BATCH_SIZE = 8
DEFAULTPROMPT='你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化'
#
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
import tiktoken
def call_openai_makenote(openaiobj,transcription,usemodelname):
## 直接做會議紀錄,GPT4或GPT 3.5但小於16K
response = openaiobj.chat.completions.create(
#model="gpt-3.5-turbo",
model=usemodelname,
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def call_openai_summary(openaiobj,transcription,usemodelname):
## 分段摘要
response = openaiobj.chat.completions.create(
#model="gpt-3.5-turbo",
model=usemodelname,
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先校正,再摘要會議重點內容"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def call_openai_summaryall(openaiobj,transcription,usemodelname):
response = openaiobj.chat.completions.create(
#model="gpt-3.5-turbo",
model=usemodelname,
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據分段的會議摘要,彙整成正式會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def split_into_chunks(text,LLMmodel, tokens=15900):
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
encoding = tiktoken.encoding_for_model(LLMmodel)
words = encoding.encode(text)
chunks = []
for i in range(0, len(words), tokens):
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
return chunks
def gpt3write(openaikeystr,inputtext,LLMmodel):
# openaiobj = OpenAI(
# # This is the default and can be omitted
# api_key=openaikeystr,
# )
if hugapikey=='test':
realkey=openaikeystr
else:
realkey=hugapikey
#openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
openaiobj =OpenAI( api_key=realkey)
text = inputtext
#openaikey.set_key(openaikeystr)
#print('process_chunk',openaikey.get_key())
chunks = split_into_chunks(text,LLMmodel)
i=1
if len(chunks)>1:
response='這是分段會議紀錄摘要\n\n'
for chunk in chunks:
response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
i=i+1
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
# response=response+call_openai_summary(openaiobj,chunk)
else:
finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
return finalresponse
# # Processes chunks in parallel
# with ThreadPoolExecutor() as executor:
# responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
# return responses
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
transcribe_text=""
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
if seconds is not None:
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
else:
# we have a malformed timestamp so just return it as is
return seconds
def transcribe(file, return_timestamps):
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe","language": "chinese",}, return_timestamps=return_timestamps)
text = outputs["text"]
if return_timestamps:
timestamps = outputs["chunks"]
timestamps = [
f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
for chunk in timestamps
]
text = "\n".join(str(feature) for feature in timestamps)
global transcribe_text
transcribe_text=text
# with open('asr_resul.txt', 'w') as f:
# f.write(text)
# ainotes=process_chunks(text)
# with open("ainotes_result.txt", "a") as f:
# f.write(ainotes)
return text
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
# gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
# gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax),先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
import google.generativeai as genai
def gpt4write(openaikeystr,transcribe_text,LLMmodel):
# openaiobj = OpenAI(
# # This is the default and can be omitted
# api_key=openaikeystr,
# )
if hugapikey=='test':
realkey=openaikeystr
else:
realkey=hugapikey
#openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
openaiobj =OpenAI( api_key=realkey)
#text = inputtext
#openaikey.set_key(openaikeystr)
#print('process_chunk',openaikey.get_key())
#chunks = split_into_chunks(text)
#response='這是分段會議紀錄結果\n\n'
finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
# response=response+call_openai_summary(openaiobj,chunk)
return finalresponse
return 'ok'
def gewritenote(prompt,inputscript):
api_key = genaikey
genai.configure(api_key = api_key)
model = genai.GenerativeModel('gemini-pro')
#genprompt='你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化'
genprompt=prompt+'#'+inputscript+'#'
response = model.generate_content( genprompt)
return response.text
def writenotes( LLMmodel,apikeystr,prompt,inputscript):
#text=transcribe_text
#openaikey.set_key(inputkey)
#openaikey = OpenAIKeyClass(inputkey)
global transcribe_text
print('ok')
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
transcribe_text=inputscript
if LLMmodel=="gpt-3.5-turbo":
ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
elif LLMmodel=="gpt-4-0125-preview":
ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
elif LLMmodel=='gemini':
ainotestext=gewritenote(prompt,transcribe_text)
# ainotestext=inputscript
#ainotestext=""
# with open('asr_resul.txt', 'w') as f:
# #print(transcribe_text)
# # f.write(inputkey)
# f.write(transcribe_text)
# with open('ainotes.txt','w') as f:
# f.write(ainotestext)
return ainotestext
ainotes = gr.Interface(
fn=writenotes,
inputs=[ gr.inputs.Radio(["gemini","gpt-3.5-turbo", "gpt-4-0125-preview"], label="LLMmodel", default="gemini"),gr.Textbox(label="使用GPT請輸入OPEN AI API KEY",placeholder="請輸入sk..."),gr.Textbox(label="提示詞(prompt)",placeholder=DEFAULTPROMPT,default=DEFAULTPROMPT),gr.Textbox(label="逐字稿",placeholder="若沒有做語音辨識,請輸入逐字稿")],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔,並將本逐字稿欄位清空,若有逐字稿可以直接貼在逐字稿"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax), 做完語音辨識再貼過來做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
with demo:
gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])
demo.launch(enable_queue=True) |