aka7774 commited on
Commit
d691b8c
·
verified ·
1 Parent(s): 8cd95c4

Upload 9 files

Browse files
Files changed (7) hide show
  1. app.py +9 -25
  2. fn.py +19 -0
  3. install.bat +56 -0
  4. install.py +5 -0
  5. main.py +43 -0
  6. requirements.txt +3 -0
  7. venv.sh +7 -0
app.py CHANGED
@@ -1,30 +1,14 @@
1
- import git
2
- git.Repo.clone_from('https://github.com/reazon-research/ReazonSpeech', 'ReazonSpeech')
3
- import pip, site, importlib
4
- pip.main(['install', 'ReazonSpeech/pkg/nemo-asr'])
5
- importlib.reload(site)
6
-
7
  import gradio as gr
8
- from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe
9
-
10
- model = None
11
-
12
- def speech_to_text(audio_file):
13
- global model
14
 
15
- if not model:
16
- model = load_model()
17
 
18
- audio = audio_from_path(audio_file)
19
- ret = transcribe(model, audio)
20
-
21
- return ret.text
22
- # ret.segments
23
- # ret.subwords
24
-
25
- gr.Interface(
26
- fn=speech_to_text,
27
  inputs=[
28
- gr.Audio(source="upload", type="filepath"),
29
  ],
30
- outputs="text").launch()
 
 
 
 
1
+ import fn
 
 
 
 
 
2
  import gradio as gr
 
 
 
 
 
 
3
 
4
+ fn.load_model()
 
5
 
6
+ demo = gr.Interface(
7
+ fn=fn.speech_to_text,
 
 
 
 
 
 
 
8
  inputs=[
9
+ gr.Audio(sources="upload", type="filepath"),
10
  ],
11
+ outputs=["text", "text"])
12
+
13
+ if __name__ == '__main__':
14
+ demo.launch()
fn.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import install
2
+ from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe
3
+
4
+ model = None
5
+
6
+ def speech_to_text(audio_file, _model_size = None):
7
+ global model
8
+
9
+ if not model:
10
+ model = load_model()
11
+
12
+ audio = audio_from_path(audio_file)
13
+ ret = transcribe(model, audio)
14
+
15
+ text_with_timestamps = ''
16
+ for segment in ret.segments:
17
+ text_with_timestamps += f"{segment.start_seconds:.2f}\t{segment.end_seconds:.2f}\t{segment.text}\n"
18
+
19
+ return ret.text, text_with_timestamps
install.bat ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ rem -------------------------------------------
4
+ rem NOT guaranteed to work on Windows
5
+
6
+ set REPOS=https://huggingface.co/spaces/aka7774/reazonspeech
7
+ set APPDIR=reazonspeech
8
+ set VENV=venv
9
+
10
+ rem -------------------------------------------
11
+
12
+ set INSTALL_DIR=%~dp0
13
+ cd /d %INSTALL_DIR%
14
+
15
+ :git_clone
16
+ set DL_URL=%REPOS%
17
+ set DL_DST=%APPDIR%
18
+ git clone %DL_URL% %APPDIR%
19
+ if exist %DL_DST% goto install_python
20
+
21
+ set DL_URL=https://github.com/git-for-windows/git/releases/download/v2.41.0.windows.3/PortableGit-2.41.0.3-64-bit.7z.exe
22
+ set DL_DST=PortableGit-2.41.0.3-64-bit.7z.exe
23
+ curl -L -o %DL_DST% %DL_URL%
24
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
25
+ %DL_DST% -y
26
+ del %DL_DST%
27
+
28
+ set GIT=%INSTALL_DIR%PortableGit\bin\git
29
+ %GIT% clone %REPOS%
30
+
31
+ :install_python
32
+ set DL_URL=https://github.com/indygreg/python-build-standalone/releases/download/20240107/cpython-3.10.13+20240107-i686-pc-windows-msvc-shared-install_only.tar.gz
33
+ set DL_DST="%INSTALL_DIR%python.tar.gz"
34
+ curl -L -o %DL_DST% %DL_URL%
35
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
36
+ tar -xzf %DL_DST%
37
+
38
+ set PYTHON=%INSTALL_DIR%python\python.exe
39
+ set PATH=%PATH%;%INSTALL_DIR%python310\Scripts
40
+
41
+ :install_venv
42
+ cd %APPDIR%
43
+ %PYTHON% -m venv %VENV%
44
+ set PYTHON=%VENV%\Scripts\python.exe
45
+
46
+ :install_pip
47
+ set DL_URL=https://bootstrap.pypa.io/get-pip.py
48
+ set DL_DST=%INSTALL_DIR%get-pip.py
49
+ curl -o %DL_DST% %DL_URL%
50
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
51
+ %PYTHON% %DL_DST%
52
+
53
+ %PYTHON% -m pip install gradio
54
+ %PYTHON% -m pip install -r requirements.txt
55
+
56
+ pause
install.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import git
2
+ git.Repo.clone_from('https://github.com/reazon-research/ReazonSpeech', 'ReazonSpeech')
3
+ import pip, site, importlib
4
+ pip.main(['install', 'ReazonSpeech/pkg/nemo-asr'])
5
+ importlib.reload(site)
main.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import signal
5
+ import psutil
6
+ import io
7
+
8
+ from fastapi import FastAPI, Request, status, Form, UploadFile
9
+ from fastapi.staticfiles import StaticFiles
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel, Field
12
+ from fastapi.exceptions import RequestValidationError
13
+ from fastapi.responses import JSONResponse
14
+
15
+ import fn
16
+ import gradio as gr
17
+ from app import demo
18
+
19
+ app = FastAPI()
20
+
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=['*'],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ gr.mount_gradio_app(app, demo, path="/gradio")
30
+
31
+ fn.load_model()
32
+
33
+ @app.post("/transcribe")
34
+ async def transcribe_audio(file: UploadFile = Form(...)):
35
+ try:
36
+ file_content = await file.read()
37
+ file_stream = io.BytesIO(file_content)
38
+
39
+ text_only, text_with_timestamps = speech_to_text(file_stream)
40
+
41
+ return {"transcription": text_only, "text_with_timestamps": text_with_timestamps}
42
+ except Exception as e:
43
+ return {"error": str(e)}
requirements.txt CHANGED
@@ -1,2 +1,5 @@
 
 
1
  Cython
2
  GitPython
 
 
1
+ fastapi
2
+ uvicorn
3
  Cython
4
  GitPython
5
+ python-multipart
venv.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/bash
2
+
3
+ python3 -m venv venv
4
+ curl -kL https://bootstrap.pypa.io/get-pip.py | venv/bin/python
5
+
6
+ venv/bin/python -m pip install gradio
7
+ venv/bin/python -m pip install -r requirements.txt