File size: 8,196 Bytes
0ca719f
 
 
 
b7ba5fb
0ca719f
b7ba5fb
c2c01b8
 
0ca719f
 
 
 
 
 
 
 
 
 
 
b051926
 
 
 
 
 
 
 
 
 
 
 
 
 
9282cfe
b051926
 
 
 
 
 
 
 
5e5f20d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b051926
 
7eb7423
b051926
 
 
 
 
 
 
 
52264b1
 
b051926
52264b1
 
44d039c
b7ba5fb
 
 
 
6091c72
b7ba5fb
b051926
 
 
 
7222f55
7e749fe
b051926
7222f55
9282cfe
7e749fe
1f0bb94
5e5f20d
1f0bb94
 
b051926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ca719f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11cf7c8
0ca719f
 
 
 
 
 
 
 
b051926
 
 
 
 
 
 
 
 
0ca719f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b051926
0ca719f
b051926
 
 
0ca719f
 
 
 
 
 
 
 
 
 
 
 
 
b051926
0ca719f
b051926
 
 
0ca719f
b051926
 
 
 
 
 
 
b2c5097
b051926
 
 
 
b2c5097
52264b1
5e5f20d
 
6091c72
b051926
 
 
 
 
 
 
 
 
 
5e5f20d
b051926
 
 
 
 
5e5f20d
b051926
 
 
 
 
 
 
0ca719f
 
 
 
 
b051926
0ca719f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
import os

hugapikey=os.environ['openaikey']
MODEL_NAME = "seiching/whisper-small-seiching"
#MODEL_NAME = "openai/whisper-small"
BATCH_SIZE = 8

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
import tiktoken



def call_openai_api(openaiobj,transcription):

    response = openaiobj.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,討論內容細節請略過,請列出經主席確認的會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response.choices[0].message.content
def call_openai_summary(openaiobj,transcription):

    response = openaiobj.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "你是專業的文書處理員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,摘錄重點,要用比較正式及容易閱讀的寫法,避免口語化"
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response.choices[0].message.content



def split_into_chunks(text, tokens=3500):
    encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
    words = encoding.encode(text)
    chunks = []
    for i in range(0, len(words), tokens):
        chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
    return chunks

def process_chunks(openaikeystr,inputtext):
    # openaiobj = OpenAI(
    # # This is the default and can be omitted

    # api_key=openaikeystr,
    # )
    if hugapikey=='test':
        realkey=openaikeystr
    else:
        realkey=hugapikey   

    #openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
    openaiobj =OpenAI( api_key=realkey)
    text = inputtext
    #openaikey.set_key(openaikeystr)
    #print('process_chunk',openaikey.get_key())
    chunks = split_into_chunks(text)
    response=''
    i=1
    for chunk in chunks:
      
      response=response+'第' +str(i)+'段\n'+call_openai_api(openaiobj,chunk)+'\n\n'
      i=i+1
      # response=response+call_openai_summary(openaiobj,chunk)

    #finalresponse=response+' summary \n\n' +call_openai_api(openaiobj,response)
    return response
    # # Processes chunks in parallel
    # with ThreadPoolExecutor() as executor:
    #     responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
    # return responses
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr

MODEL_NAME = "seiching/whisper-small-seiching"
BATCH_SIZE = 8
transcribe_text="this is a test"

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
    if seconds is not None:
        milliseconds = round(seconds * 1000.0)

        hours = milliseconds // 3_600_000
        milliseconds -= hours * 3_600_000

        minutes = milliseconds // 60_000
        milliseconds -= minutes * 60_000

        seconds = milliseconds // 1_000
        milliseconds -= seconds * 1_000

        hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
        return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
    else:
        # we have a malformed timestamp so just return it as is
        return seconds


def transcribe(file, task, return_timestamps):
    outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task,"language": "chinese",}, return_timestamps=return_timestamps)
    text = outputs["text"]
    if return_timestamps:
        timestamps = outputs["chunks"]
        timestamps = [
            f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
            for chunk in timestamps
        ]
        text = "\n".join(str(feature) for feature in timestamps)
    global transcribe_text
    transcribe_text=text
    # with open('asr_resul.txt', 'w') as f:
    #   f.write(text)

    # ainotes=process_chunks(text)
    # with open("ainotes_result.txt", "a") as f:
    #   f.write(ainotes)

    return text


demo = gr.Blocks()

mic_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
        gr.inputs.Checkbox(default=False, label="Return timestamps"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
        " 長度沒有限制"
    ),
    allow_flagging="never",
)
file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
        gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
        gr.inputs.Checkbox(default=False, label="Return timestamps"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
        " 長度沒有限制"
    ),
    # examples=[
    #     ["./example.flac", "transcribe", False],
    #     ["./example.flac", "transcribe", True],
    # ],
    cache_examples=True,
    allow_flagging="never",
)
def writenotes( apikeystr,inputscript):
  #text=transcribe_text
  #openaikey.set_key(inputkey)
  #openaikey = OpenAIKeyClass(inputkey)
  print('ok')
  if len(inputscript)>10:
    transcribe_text=inputscript
   
  ainotestext=process_chunks(apikeystr,transcribe_text)
   # ainotestext=inputscript
  #ainotestext=""
  # with open('asr_resul.txt', 'w') as f:
  #     #print(transcribe_text)
  #    # f.write(inputkey)
  #     f.write(transcribe_text)
  # with open('ainotes.txt','w') as f:
  #   f.write(ainotestext)
  return ainotestext
ainotes = gr.Interface(
    fn=writenotes,
    inputs=[gr.Textbox(label="OPEN AI API KEY",placeholder="請輸入sk..."),gr.Textbox(label="逐字稿",placeholder="若沒有做語音辨識,請輸入逐字稿")],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔若有逐字稿可以直接貼在逐字稿"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 先做語音辨識再做會議紀錄摘要"
        " 長度沒有限制"
    ),
    # examples=[
    #     ["./example.flac", "transcribe", False],
    #     ["./example.flac", "transcribe", True],
    # ],
    cache_examples=True,
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])

demo.launch(enable_queue=True)