Spaces:
Sleeping
Sleeping
add tiktoken package及openai
Browse files- app.py +145 -14
- requirements.txt +1 -0
app.py
CHANGED
@@ -16,6 +16,96 @@ pipe = pipeline(
|
|
16 |
device=device,
|
17 |
)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
|
21 |
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
|
@@ -48,6 +138,15 @@ def transcribe(file, task, return_timestamps):
|
|
48 |
for chunk in timestamps
|
49 |
]
|
50 |
text = "\n".join(str(feature) for feature in timestamps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return text
|
52 |
|
53 |
|
@@ -63,15 +162,14 @@ mic_transcribe = gr.Interface(
|
|
63 |
outputs="text",
|
64 |
layout="horizontal",
|
65 |
theme="huggingface",
|
66 |
-
title="
|
67 |
description=(
|
68 |
-
"
|
69 |
-
f"
|
70 |
-
"
|
71 |
),
|
72 |
allow_flagging="never",
|
73 |
)
|
74 |
-
|
75 |
file_transcribe = gr.Interface(
|
76 |
fn=transcribe,
|
77 |
inputs=[
|
@@ -82,21 +180,54 @@ file_transcribe = gr.Interface(
|
|
82 |
outputs="text",
|
83 |
layout="horizontal",
|
84 |
theme="huggingface",
|
85 |
-
title="
|
86 |
description=(
|
87 |
-
"
|
88 |
-
f"
|
89 |
-
"
|
90 |
),
|
91 |
-
examples=[
|
92 |
-
|
93 |
-
|
94 |
-
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
cache_examples=True,
|
96 |
allow_flagging="never",
|
97 |
)
|
98 |
|
99 |
with demo:
|
100 |
-
gr.TabbedInterface([mic_transcribe,
|
101 |
|
102 |
demo.launch(enable_queue=True)
|
|
|
16 |
device=device,
|
17 |
)
|
18 |
|
19 |
+
from openai import OpenAI
|
20 |
+
from concurrent.futures import ThreadPoolExecutor
|
21 |
+
import tiktoken
|
22 |
+
#from google.colab import userdata
|
23 |
+
# class OpenAIKeyClass:
|
24 |
+
# def __init__(self, api_key):
|
25 |
+
# self.api_key = api_key
|
26 |
+
|
27 |
+
# def get_key(self):
|
28 |
+
|
29 |
+
# return self.api_key
|
30 |
+
|
31 |
+
# def set_key(self, api_key):
|
32 |
+
# self.api_key = api_key
|
33 |
+
|
34 |
+
# # 建立一個 OpenAIKeyClass 物件
|
35 |
+
# openaikey=OpenAIKeyClass("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")
|
36 |
+
# # Add your own OpenAI API key
|
37 |
+
# client = OpenAI(
|
38 |
+
# # This is the default and can be omitted
|
39 |
+
|
40 |
+
# api_key=openaikey.get_key(),
|
41 |
+
# )
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
def call_openai_api(openaiobj,transcription):
|
46 |
+
|
47 |
+
response = openaiobj.chat.completions.create(
|
48 |
+
model="gpt-3.5-turbo",
|
49 |
+
temperature=0,
|
50 |
+
messages=[
|
51 |
+
{
|
52 |
+
"role": "system",
|
53 |
+
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請以條列式,列出討論事項及結論,討論內容細節請略過,要用比較正式及容易閱讀的寫法,避免口語化"
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"role": "user",
|
57 |
+
"content": transcription
|
58 |
+
}
|
59 |
+
]
|
60 |
+
)
|
61 |
+
return response.choices[0].message.content
|
62 |
+
|
63 |
+
|
64 |
+
def split_into_chunks(text, tokens=500):
|
65 |
+
encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
|
66 |
+
words = encoding.encode(text)
|
67 |
+
chunks = []
|
68 |
+
for i in range(0, len(words), tokens):
|
69 |
+
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
|
70 |
+
return chunks
|
71 |
+
|
72 |
+
def process_chunks(openaikeystr,inputtext):
|
73 |
+
openaiobj = OpenAI(
|
74 |
+
# This is the default and can be omitted
|
75 |
+
|
76 |
+
api_key=openaikeystr,
|
77 |
+
)
|
78 |
+
|
79 |
+
text = inputtext
|
80 |
+
#openaikey.set_key(openaikeystr)
|
81 |
+
#print('process_chunk',openaikey.get_key())
|
82 |
+
chunks = split_into_chunks(text)
|
83 |
+
response=''
|
84 |
+
for chunk in chunks:
|
85 |
+
response=response+call_openai_api(openaiobj,chunk)
|
86 |
+
return response
|
87 |
+
# # Processes chunks in parallel
|
88 |
+
# with ThreadPoolExecutor() as executor:
|
89 |
+
# responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
|
90 |
+
# return responses
|
91 |
+
import torch
|
92 |
+
from transformers import pipeline
|
93 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
94 |
+
import gradio as gr
|
95 |
+
|
96 |
+
MODEL_NAME = "seiching/whisper-small-seiching"
|
97 |
+
BATCH_SIZE = 8
|
98 |
+
transcribe_text="this is a test"
|
99 |
+
|
100 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
101 |
+
|
102 |
+
pipe = pipeline(
|
103 |
+
task="automatic-speech-recognition",
|
104 |
+
model=MODEL_NAME,
|
105 |
+
chunk_length_s=30,
|
106 |
+
device=device,
|
107 |
+
)
|
108 |
+
|
109 |
|
110 |
# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
|
111 |
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
|
|
|
138 |
for chunk in timestamps
|
139 |
]
|
140 |
text = "\n".join(str(feature) for feature in timestamps)
|
141 |
+
global transcribe_text
|
142 |
+
transcribe_text=text
|
143 |
+
# with open('asr_resul.txt', 'w') as f:
|
144 |
+
# f.write(text)
|
145 |
+
|
146 |
+
# ainotes=process_chunks(text)
|
147 |
+
# with open("ainotes_result.txt", "a") as f:
|
148 |
+
# f.write(ainotes)
|
149 |
+
|
150 |
return text
|
151 |
|
152 |
|
|
|
162 |
outputs="text",
|
163 |
layout="horizontal",
|
164 |
theme="huggingface",
|
165 |
+
title="會議紀錄小幫手AINotes",
|
166 |
description=(
|
167 |
+
"可由麥克風錄音或上傳語音檔"
|
168 |
+
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
|
169 |
+
" 長度沒有限制"
|
170 |
),
|
171 |
allow_flagging="never",
|
172 |
)
|
|
|
173 |
file_transcribe = gr.Interface(
|
174 |
fn=transcribe,
|
175 |
inputs=[
|
|
|
180 |
outputs="text",
|
181 |
layout="horizontal",
|
182 |
theme="huggingface",
|
183 |
+
title="會議紀錄小幫手AINotes",
|
184 |
description=(
|
185 |
+
"可由麥克風錄音或上傳語音檔"
|
186 |
+
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
|
187 |
+
" 長度沒有限制"
|
188 |
),
|
189 |
+
# examples=[
|
190 |
+
# ["./example.flac", "transcribe", False],
|
191 |
+
# ["./example.flac", "transcribe", True],
|
192 |
+
# ],
|
193 |
+
cache_examples=True,
|
194 |
+
allow_flagging="never",
|
195 |
+
)
|
196 |
+
def writenotes(apikeystr):
|
197 |
+
#text=transcribe_text
|
198 |
+
#openaikey.set_key(inputkey)
|
199 |
+
#openaikey = OpenAIKeyClass(inputkey)
|
200 |
+
print('ok')
|
201 |
+
ainotestext=process_chunks(apikeystr,transcribe_text)
|
202 |
+
#ainotestext=""
|
203 |
+
# with open('asr_resul.txt', 'w') as f:
|
204 |
+
# #print(transcribe_text)
|
205 |
+
# # f.write(inputkey)
|
206 |
+
# f.write(transcribe_text)
|
207 |
+
# with open('ainotes.txt','w') as f:
|
208 |
+
# f.write(ainotestext)
|
209 |
+
return ainotestext
|
210 |
+
ainotes = gr.Interface(
|
211 |
+
fn=writenotes,
|
212 |
+
inputs=gr.Textbox(label="OPEN AI API KEY",placeholder="請輸入sk..."),
|
213 |
+
outputs="text",
|
214 |
+
layout="horizontal",
|
215 |
+
theme="huggingface",
|
216 |
+
title="會議紀錄小幫手AINotes",
|
217 |
+
description=(
|
218 |
+
"可由麥克風錄音或上傳語音檔"
|
219 |
+
f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
|
220 |
+
" 長度沒有限制"
|
221 |
+
),
|
222 |
+
# examples=[
|
223 |
+
# ["./example.flac", "transcribe", False],
|
224 |
+
# ["./example.flac", "transcribe", True],
|
225 |
+
# ],
|
226 |
cache_examples=True,
|
227 |
allow_flagging="never",
|
228 |
)
|
229 |
|
230 |
with demo:
|
231 |
+
gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])
|
232 |
|
233 |
demo.launch(enable_queue=True)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
torch
|
3 |
transformers
|
|
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
torch
|
3 |
transformers
|
4 |
+
tiktoken
|