Spaces:
Runtime error
Runtime error
MingLi
commited on
Commit
·
b465de4
1
Parent(s):
183f1cf
ver0.1
Browse files- .gitignore +1 -0
- Dockerfile +17 -0
- app.py +96 -0
- requirements.txt +5 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.11
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
RUN apt-get update && apt-get install -y \
|
9 |
+
ffmpeg
|
10 |
+
|
11 |
+
COPY ./requirements.txt /code/requirements.txt
|
12 |
+
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
14 |
+
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import zipfile
|
4 |
+
import tempfile
|
5 |
+
import subprocess
|
6 |
+
from transformers import pipeline
|
7 |
+
import torch
|
8 |
+
from zipfile import ZipFile
|
9 |
+
|
10 |
+
from fastapi import FastAPI
|
11 |
+
|
12 |
+
app = FastAPI()
|
13 |
+
|
14 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
15 |
+
|
16 |
+
model_id = "openai/whisper-medium.en"
|
17 |
+
|
18 |
+
pipe = pipeline(
|
19 |
+
"automatic-speech-recognition",
|
20 |
+
model=model_id,
|
21 |
+
chunk_length_s=30,
|
22 |
+
device=device,
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
def support_gbk(zip_file: ZipFile):
|
27 |
+
name_to_info = zip_file.NameToInfo
|
28 |
+
# copy map first
|
29 |
+
for name, info in name_to_info.copy().items():
|
30 |
+
real_name = name.encode("cp437").decode("gbk")
|
31 |
+
if real_name != name:
|
32 |
+
info.filename = real_name
|
33 |
+
del name_to_info[name]
|
34 |
+
name_to_info[real_name] = info
|
35 |
+
return zip_file
|
36 |
+
|
37 |
+
|
38 |
+
def handel(f):
|
39 |
+
if not f:
|
40 |
+
raise gr.Error("请上传文件")
|
41 |
+
if f.name.endswith(".zip"):
|
42 |
+
with support_gbk(ZipFile(f.name, "r")) as z:
|
43 |
+
dir = tempfile.TemporaryDirectory()
|
44 |
+
z.extractall(path=dir.name)
|
45 |
+
return handel_files(
|
46 |
+
[
|
47 |
+
os.path.join(filepath, filename)
|
48 |
+
for filepath, _, filenames in os.walk(dir.name)
|
49 |
+
for filename in filenames
|
50 |
+
]
|
51 |
+
)
|
52 |
+
else:
|
53 |
+
return handel_files([f.name])
|
54 |
+
|
55 |
+
|
56 |
+
def ffmpeg_convert(file_input, file_output):
|
57 |
+
if subprocess.run(["ffmpeg", "-y", "-i", file_input, file_output]).returncode:
|
58 |
+
raise gr.Error("ffmpeg_convert 失败, 请检查文件格式是否正确")
|
59 |
+
|
60 |
+
|
61 |
+
def handel_files(f_ls):
|
62 |
+
files = []
|
63 |
+
for file in f_ls:
|
64 |
+
if file.endswith(".m4a"):
|
65 |
+
file_output = file.replace(".m4a", ".wav")
|
66 |
+
ffmpeg_convert(file, file_output)
|
67 |
+
elif file.endswith(".mp3"):
|
68 |
+
file_output = file.replace(".mp3", ".wav")
|
69 |
+
ffmpeg_convert(file, file_output)
|
70 |
+
elif file.endswith(".wav"):
|
71 |
+
file_output = file
|
72 |
+
ffmpeg_convert(file, file_output)
|
73 |
+
else:
|
74 |
+
gr.Warning(f"存在不合法文件{file_name},已跳过处理")
|
75 |
+
files.append(file_output)
|
76 |
+
ret = []
|
77 |
+
for file in files:
|
78 |
+
ret.append(whisper_handler(file))
|
79 |
+
|
80 |
+
return "\n\n".join(ret)
|
81 |
+
|
82 |
+
|
83 |
+
def whisper_handler(file):
|
84 |
+
file_name = os.path.basename(file)
|
85 |
+
gr.Info(f"处理文件 - {file_name}")
|
86 |
+
return pipe(file)["text"]
|
87 |
+
|
88 |
+
|
89 |
+
with gr.Blocks() as blocks:
|
90 |
+
f = gr.File(file_types=[".zip", ".mp3", ".wav", ".m4a"])
|
91 |
+
b = gr.Button(value="提交")
|
92 |
+
t = gr.Textbox(label="结果")
|
93 |
+
|
94 |
+
b.click(handel, inputs=f, outputs=t)
|
95 |
+
|
96 |
+
app = gr.mount_gradio_app(app, blocks, path="/")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
fastapi
|
5 |
+
uvicorn[standard]
|