Spaces:
Sleeping
Sleeping
File size: 9,507 Bytes
0ca719f b7ba5fb 0ca719f b7ba5fb bd2f0d0 c2c01b8 0ca719f b051926 bc1215e b051926 bc1215e b051926 9282cfe b051926 5e5f20d bc1215e 5e5f20d 494d547 5e5f20d b051926 bc1215e b051926 52264b1 b051926 52264b1 44d039c b7ba5fb 6091c72 b7ba5fb b051926 bd2f0d0 7e749fe b051926 7222f55 9282cfe 7e749fe 1f0bb94 bd2f0d0 494d547 b051926 0ca719f 11cf7c8 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 0ca719f b051926 bd2f0d0 a5339f4 bd2f0d0 a5339f4 bd2f0d0 b051926 bd2f0d0 6091c72 b051926 bd2f0d0 b051926 494d547 b051926 0ca719f b051926 0ca719f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
import os
hugapikey=os.environ['openaikey']
genaikey=os.environ['genaikey']
MODEL_NAME = "seiching/whisper-small-seiching"
#MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
import tiktoken
usemodelname='gpt-4-0125-preview'
def call_openai_api(openaiobj,transcription):
response = openaiobj.chat.completions.create(
#model="gpt-3.5-turbo",
model=usemodelname,
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,討論內容細節請略過,請列出經主席確認的會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def call_openai_summary(openaiobj,transcription):
response = openaiobj.chat.completions.create(
#model="gpt-3.5-turbo",
model=usemodelname,
temperature=0,
messages=[
{
"role": "system",
"content": "你是專業的會議紀錄製作員,請根據分段的會議決證,彙整成正式會議紀錄"
},
{
"role": "user",
"content": transcription
}
]
)
return response.choices[0].message.content
def split_into_chunks(text, tokens=127900):
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
encoding = tiktoken.encoding_for_model(usemodelname)
words = encoding.encode(text)
chunks = []
for i in range(0, len(words), tokens):
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
return chunks
def process_chunks(openaikeystr,inputtext):
# openaiobj = OpenAI(
# # This is the default and can be omitted
# api_key=openaikeystr,
# )
if hugapikey=='test':
realkey=openaikeystr
else:
realkey=hugapikey
#openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
openaiobj =OpenAI( api_key=realkey)
text = inputtext
#openaikey.set_key(openaikeystr)
#print('process_chunk',openaikey.get_key())
chunks = split_into_chunks(text)
response='這是分段會議紀錄結果\n\n'
i=1
for chunk in chunks:
response=response+'第' +str(i)+'段\n'+call_openai_api(openaiobj,chunk)+'\n\n'
i=i+1
# response=response+call_openai_summary(openaiobj,chunk)
if i>2:
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_api(openaiobj,response)
else:
finalresponse=response
return finalresponse
# # Processes chunks in parallel
# with ThreadPoolExecutor() as executor:
# responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
# return responses
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
MODEL_NAME = "seiching/whisper-small-seiching"
BATCH_SIZE = 8
transcribe_text="this is a test"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
if seconds is not None:
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
else:
# we have a malformed timestamp so just return it as is
return seconds
def transcribe(file, task, return_timestamps):
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task,"language": "chinese",}, return_timestamps=return_timestamps)
text = outputs["text"]
if return_timestamps:
timestamps = outputs["chunks"]
timestamps = [
f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
for chunk in timestamps
]
text = "\n".join(str(feature) for feature in timestamps)
global transcribe_text
transcribe_text=text
# with open('asr_resul.txt', 'w') as f:
# f.write(text)
# ainotes=process_chunks(text)
# with open("ainotes_result.txt", "a") as f:
# f.write(ainotes)
return text
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
gr.inputs.Checkbox(default=False, label="Return timestamps"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
import google.generativeai as genai
def gpt4write(apikeystr,transcribe_text):
return 'ok'
def gewritenote(inputscript):
api_key = genaikey
genai.configure(api_key = api_key)
model = genai.GenerativeModel('gemini-pro')
genprompt='你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,討論內容細節請略過,請列出經主席確認的會議決議,並要用比較正式及容易閱讀的寫法,避免口語化'
genprompt=genprompt+'#'+inputscript+'#'
response = model.generate_content( genprompt)
return response.text
def writenotes( LLMmodel,apikeystr,inputscript):
#text=transcribe_text
#openaikey.set_key(inputkey)
#openaikey = OpenAIKeyClass(inputkey)
print('ok')
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
transcribe_text=inputscript
if LLMmodel=="gpt-3.5-turbo":
ainotestext=process_chunks(apikeystr,transcribe_text)
elif LLMmodel=="gpt-4-0125-preview":
ainotestext=gpt4write(apikeystr,transcribe_text)
elif LLMmodel=='gemini':
ainotestext=gewritenote(inputscript)
# ainotestext=inputscript
#ainotestext=""
# with open('asr_resul.txt', 'w') as f:
# #print(transcribe_text)
# # f.write(inputkey)
# f.write(transcribe_text)
# with open('ainotes.txt','w') as f:
# f.write(ainotestext)
return ainotestext
ainotes = gr.Interface(
fn=writenotes,
inputs=[ gr.inputs.Radio(["gpt-3.5-turbo", "gpt-4-0125-preview","gemini"], label="LLMmodel", default="gpt-3.5-turbo"),gr.Textbox(label="使用GPT請輸入OPEN AI API KEY",placeholder="請輸入sk..."),gr.Textbox(label="逐字稿",placeholder="若沒有做語音辨識,請輸入逐字稿")],
outputs="text",
layout="horizontal",
theme="huggingface",
title="會議紀錄小幫手AINotes",
description=(
"可由麥克風錄音或上傳語音檔,並將本逐字稿欄位清空,若有逐字稿可以直接貼在逐字稿"
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
" 長度沒有限制"
),
# examples=[
# ["./example.flac", "transcribe", False],
# ["./example.flac", "transcribe", True],
# ],
cache_examples=True,
allow_flagging="never",
)
with demo:
gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])
demo.launch(enable_queue=True) |