Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 25, 2024

Commit

85002a1

verified ·

1 Parent(s): 2e6be6b

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -16

app.py CHANGED Viewed

@@ -15,10 +15,13 @@
 #---------------------------------------------------------------------------------------------------------------------------------------------
-import time
 import os
 import re
 import warnings
 from pydub import AudioSegment
 import pandas as pd
@@ -36,14 +39,20 @@ from sklearn.metrics.pairwise import cosine_similarity
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
 from fpdf import FPDF
 from PIL import Image
-title = """# Welcome to 🌟Switch Work webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download
-✨ Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and was trained using the Fill-in-the-Middle objective on 4+ trillion tokens. The model was trained with NVIDIA NeMo™ Framework using the NVIDIA Eos Supercomputer built with NVIDIA DGX H100 systems. You can build with this endpoint using✨StarCoder available here : [bigcode/starcoder2-15b](https://huggingface.co/bigcode/starcoder2-15b). You can also use ✨StarCoder by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/starcoder2?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3>
-Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
-"""
 warnings.filterwarnings("ignore")
@@ -51,32 +60,56 @@ def convert_to_wav(audio_file):
     audio = AudioSegment.from_file(audio_file, format="m4a")
     wav_file = "temp.wav"
     audio.export(wav_file, format="wav")
     return wav_file
 #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
-asr = pipeline("automatic-speech-recognition", "NbAiLabBeta/nb-whisper-large")
 def transcribe_audio(audio_file, batch_size=4):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
-    outputs = asr(audio_file, chunk_length_s=28, batch_size=batch_size, return_timestamps=False, generate_kwargs={'num_beams': 5, 'task': 'transcribe', 'language': 'no'}, skip_special_tokens=True)
-    text = outputs["text", skip_special_tokens=True]
     end_time = time.time()
     output_time = end_time - start_time
     word_count = len(text.split())
     result = f"Transcription: {text.strip()}\n\nTime taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
     return text.strip(), result
-#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 # Clean and preprocess text
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
@@ -187,16 +220,26 @@ def save_to_pdf(text, summary):
 iface = gr.Blocks()
 with iface:
-    gr.HTML('<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/blob/main/pic09w9678yhit.png" alt="Image" style="width: 100%; height: auto;"></a>')
-    gr.Markdown(title)
     with gr.Tabs():
         with gr.TabItem("Transcription"):
             audio_input = gr.Audio(type="filepath")
-            text_output = gr.Textbox(label="Transcription")
-            result_output = gr.Textbox(label="Details")
             transcribe_button = gr.Button("Transcribe")
             transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])

 #---------------------------------------------------------------------------------------------------------------------------------------------
 import os
 import re
 import warnings
+import time
+import datetime
+import subprocess
+from pathlib import Path
 from pydub import AudioSegment
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from gpuinfo import GPUInfo
 import gradio as gr
 from fpdf import FPDF
 from PIL import Image
+HEADER_INFO = """
+    # WEB APP ✨| Norwegian WHISPER Model
+Switch Work [Transkribering av lydfiler til norsk skrift]
+""".strip()
+LOGO = "https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/blob/main/pic09w9678yhit.png"
+SIDEBAR_INFO = f"""
+<div align=center>
+<img src="{LOGO}" width=100/>"""
 warnings.filterwarnings("ignore")
     audio = AudioSegment.from_file(audio_file, format="m4a")
     wav_file = "temp.wav"
     audio.export(wav_file, format="wav")
     return wav_file
+#def convert_to_wav(filepath):
+    #_,file_ending = os.path.splitext(f'{filepath}')
+    #audio_file = filepath.replace(file_ending, ".wav")
+    #print("starting conversion to wav")
+    #os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
+    #return audio_file
 #:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model="NbAiLab/nb-whisper-large",
+    chunk_length_s=30,
+    device=device,
+)
 def transcribe_audio(audio_file, batch_size=4):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
+    outputs = pipe(audio_file, batch_size=batch_size, return_timestamps=False, generate_kwargs={'num_beams': 5, 'task': 'transcribe', 'language': 'no'}) # skip_special_tokens=True
+    text = outputs["text"]
     end_time = time.time()
     output_time = end_time - start_time
     word_count = len(text.split())
     result = f"Transcription: {text.strip()}\n\nTime taken: {output_time:.2f} seconds\nNumber of words: {word_count}"
     return text.strip(), result
+    memory = psutil.virtual_memory()
+    gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
+    gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
+    gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
+    system_info = f"""
+    *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
+    *Processing time: {time_diff:.5} seconds.*
+    *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}"""
+#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 # Clean and preprocess text
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
 iface = gr.Blocks()
+PLACEHOLDER = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <img src=""https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/blob/main/pic09w9678yhit.png" alt="" style="width: 100%; height: auto; opacity: 0.93;  ">
+   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Switch Work | Verktæysett no.1</h1>
+   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">En webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download</p>
+</div>
+"""
 with iface:
+    #gr.HTML('<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/blob/main/pic09w9678yhit.png" alt="" style="width: 100%; height: auto; opacity: 0.55; >')
+    #gr.Markdown("**Switch Work webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download**")
+    gr.Image({LOGO})
+    gr.HTML(SIDEBAR_INFO)
+    gr.Markdown(HEADER_INFO)
     with gr.Tabs():
         with gr.TabItem("Transcription"):
             audio_input = gr.Audio(type="filepath")
+            text_output = gr.Textbox(label="Text")
+            result_output = gr.Textbox(label="Transcription Details")
             transcribe_button = gr.Button("Transcribe")
             transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])