camparchimedes commited on
Commit
b8712f3
·
verified ·
1 Parent(s): aebda00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -60
app.py CHANGED
@@ -1,5 +1,6 @@
 
1
  ### -----------------------------------------------------------------------
2
- ### (BASE, Revised) version_1.07 ALPHA, app.py
3
  ### -----------------------------------------------------------------------
4
 
5
  # -------------------------------------------------------------------------
@@ -16,96 +17,105 @@
16
  # limitations under the License.
17
  # -------------------------------------------------------------------------
18
 
19
- import spaces
20
- import gradio as gr
21
- from PIL import Image
22
- #from pydub import AudioSegment
23
- #from scipy.io import wavfile
24
-
25
  import os
26
  import re
 
27
  import time
28
- import warnings
29
- #import datetime
30
- #import pandas as pd
31
- #import csv
32
  import subprocess
33
- from pathlib import Path
 
34
  import tempfile
35
  from fpdf import FPDF
 
 
 
 
 
36
 
37
- import psutil
38
  from gpuinfo import GPUInfo
39
 
40
- #import numpy as np
41
- import torch
42
- #import torchaudio
43
- #import torchaudio.transforms as transforms
44
 
45
- from transformers import pipeline #AutoModel
46
 
47
- #import spacy
48
- #import networkx as nx
49
- #from sklearn.feature_extraction.text import TfidfVectorizer
50
- #from sklearn.metrics.pairwise import cosine_similarity
51
 
52
- warnings.filterwarnings("ignore")
 
53
 
54
- # ------------header section------------
55
- HEADER_INFO = """
56
- # SWITCHVOX ✨|🇳🇴 *Transkribering av lydfiler til norsk skrift*
57
- """.strip()
58
- LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
59
- SIDEBAR_INFO = f"""
60
- <div align="center">
61
- <img src="{LOGO}" style="width: 100%; height: auto;"/>
62
- </div>
63
- """
64
 
65
- @spaces.GPU(duration=120)
66
- def transcribe(microphone, file_upload):
67
 
68
  file = microphone if microphone is not None else file_upload
69
  start_time = time.time()
70
 
71
  #--------------____________________________________________--------------"
72
 
73
- device = "cuda" if torch.cuda.is_available() else "cpu"
74
- pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
75
- text = pipe(file)["text"]
 
 
 
76
 
77
- #--------------____________________________________________--------------"
 
78
 
 
 
79
  end_time = time.time()
80
  output_time = end_time - start_time
 
 
81
  word_count = len(text.split())
82
 
83
- # --GPU metrics
84
  memory = psutil.virtual_memory()
85
 
86
- # Default GPU utilization and memory to 0 in case of an error
87
- gpu_utilization = 0
88
- gpu_memory = 0
89
- try:
90
- gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
91
- gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
92
- gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
93
- except Exception as e:
94
- print(f"Error retrieving GPU info: {e}")
95
-
96
  # --CPU metric
97
  cpu_usage = psutil.cpu_percent(interval=1)
 
 
 
98
 
99
  # --system info string
100
  system_info = f"""
101
  Processing time: {output_time:.2f} seconds.
102
  Number of words: {word_count}
 
 
 
 
103
  """
104
- # *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
105
- # *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*
106
- # *CPU Usage: {cpu_usage}%*
107
  return text, system_info
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  def save_to_pdf(text, summary):
110
  pdf = FPDF()
111
  pdf.add_page()
@@ -127,13 +137,14 @@ css = """
127
  #transcription_output textarea {
128
  background-color: #000000; /* black */
129
  color: #00FF00 !important; /* text color */
130
- font-size: 16px; /* font size */
131
  }
132
 
133
  #system_info_box textarea {
134
  background-color: #ffe0b3; /* orange */
135
  color: black !important; /* text color */
136
- font-size: 14px; /* font size */
 
137
  }
138
  """
139
 
@@ -159,12 +170,14 @@ with iface:
159
  with gr.Row():
160
  with gr.Column(scale=3):
161
  text_output = gr.Textbox(label="Transkribert Tekst", elem_id="transcription_output")
162
- system_info = gr.Textbox(label="Antall sekunder, ord:", elem_id="system_info_box")
 
 
163
 
164
  with gr.Tabs():
165
  with gr.TabItem("Download PDF"):
166
  pdf_text_only = gr.Button("Last ned pdf med resultat")
167
- pdf_output = gr.File(label="/.docx?")
168
 
169
  pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
170
 
@@ -180,9 +193,14 @@ with iface:
180
  </a>
181
  </div>
182
  ''')
183
-
184
- transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])
 
 
 
 
 
185
 
186
 
187
 
188
- iface.launch(share=True, debug=True)
 
1
+ #!/usr/bin/env python
2
  ### -----------------------------------------------------------------------
3
+ ### (test_BASE, Revised) version_1.07 ALPHA, app.py
4
  ### -----------------------------------------------------------------------
5
 
6
  # -------------------------------------------------------------------------
 
17
  # limitations under the License.
18
  # -------------------------------------------------------------------------
19
 
 
 
 
 
 
 
20
  import os
21
  import re
22
+ import uuid
23
  import time
24
+ import psutil
 
 
 
25
  import subprocess
26
+ from tqdm import tqdm
27
+
28
  import tempfile
29
  from fpdf import FPDF
30
+ from pathlib import Path
31
+
32
+ import numpy as np
33
+ import torch
34
+ from transformers import pipeline
35
 
 
36
  from gpuinfo import GPUInfo
37
 
38
+ import gradio as gr
 
 
 
39
 
 
40
 
41
+ ###############################################################################
42
+ # Configuration.
43
+ ###############################################################################
 
44
 
45
+ #if not torch.cuda.is_available():
46
+ #DESCRIPTION += "\n<p>⚠️Running on CPU, This may not work on CPU.</p>"
47
 
48
+ CACHE_EXAMPLES = torch.device('cuda') and os.getenv("CACHE_EXAMPLES", "0") == "1"
49
+ #CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
50
+ #USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
51
+ #ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
52
+
53
+ device = torch.device('cuda')
54
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
 
55
 
56
+ #@spaces.GPU
57
+ def transcribe(microphone, file_upload, progress=gr.Progress(track_tqdm=True)):
58
 
59
  file = microphone if microphone is not None else file_upload
60
  start_time = time.time()
61
 
62
  #--------------____________________________________________--------------"
63
 
64
+ #if torch.cuda.is_available():
65
+ #with torch.no_grad():
66
+ #pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
67
+
68
+ with torch.no_grad():
69
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
70
 
71
+
72
+ text = pipe(file)["text"]
73
 
74
+ #--------------____________________________________________--------------"
75
+
76
  end_time = time.time()
77
  output_time = end_time - start_time
78
+
79
+ # --Word count
80
  word_count = len(text.split())
81
 
82
+ # --Memory metrics
83
  memory = psutil.virtual_memory()
84
 
 
 
 
 
 
 
 
 
 
 
85
  # --CPU metric
86
  cpu_usage = psutil.cpu_percent(interval=1)
87
+
88
+ # --GPU metric
89
+ gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
90
 
91
  # --system info string
92
  system_info = f"""
93
  Processing time: {output_time:.2f} seconds.
94
  Number of words: {word_count}
95
+ Memory used: {memory.percent}%
96
+ GPU Utilization: {gpu_utilization}%
97
+ GPU Memory: {gpu_memory}
98
+ CPU Usage: {cpu_usage}%
99
  """
100
+ #--------------____________________________________________--------------"
101
+
 
102
  return text, system_info
103
 
104
+
105
+ ###############################################################################
106
+ # Interface.
107
+ ###############################################################################
108
+
109
+ HEADER_INFO = """
110
+ # SWITCHVOX ✨|🇳🇴 *Transkribering av lydfiler til Norsk skrift.*
111
+ """.strip()
112
+ LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
113
+ SIDEBAR_INFO = f"""
114
+ <div align="center">
115
+ <img src="{LOGO}" style="width: 100%; height: auto;"/>
116
+ </div>
117
+ """
118
+
119
  def save_to_pdf(text, summary):
120
  pdf = FPDF()
121
  pdf.add_page()
 
137
  #transcription_output textarea {
138
  background-color: #000000; /* black */
139
  color: #00FF00 !important; /* text color */
140
+ font-size: 18px; /* font size */
141
  }
142
 
143
  #system_info_box textarea {
144
  background-color: #ffe0b3; /* orange */
145
  color: black !important; /* text color */
146
+ font-size: 16px; /* font size */
147
+ font-weight: bold; /* bold font */
148
  }
149
  """
150
 
 
170
  with gr.Row():
171
  with gr.Column(scale=3):
172
  text_output = gr.Textbox(label="Transkribert Tekst", elem_id="transcription_output")
173
+ with gr.Column(scale=1):
174
+ system_info = gr.Textbox(label="Antall sekunder, ord:", elem_id="system_info_box")
175
+
176
 
177
  with gr.Tabs():
178
  with gr.TabItem("Download PDF"):
179
  pdf_text_only = gr.Button("Last ned pdf med resultat")
180
+ pdf_output = gr.File(label="/.pdf")
181
 
182
  pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
183
 
 
193
  </a>
194
  </div>
195
  ''')
196
+ transcribe_btn.click(
197
+ fn=transcribe,
198
+ inputs=[microphone, upload],
199
+ outputs=[text_output, system_info]
200
+ )
201
+
202
+ #transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])
203
 
204
 
205
 
206
+ iface.launch(share=True,debug=True, enable_queue=True)