dwb2023 commited on
Commit
755f3a2
·
verified ·
1 Parent(s): 2a21a22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import yt_dlp as youtube_dl
3
- from transformers import pipeline, WhisperForConditionalGeneration, WhisperTokenizer
4
  from transformers.pipelines.audio_utils import ffmpeg_read
5
  import torch
6
  from huggingface_hub import CommitScheduler
@@ -21,8 +21,17 @@ YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
21
 
22
  device = 0 if torch.cuda.is_available() else "cpu"
23
 
 
 
 
 
 
 
24
  # Load the model
25
- model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
 
 
 
26
  tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
27
 
28
  # Initialize the pipeline with the quantized model
 
1
  import gradio as gr
2
  import yt_dlp as youtube_dl
3
+ from transformers import pipeline, BitsAndBytesConfig, WhisperForConditionalGeneration, WhisperTokenizer
4
  from transformers.pipelines.audio_utils import ffmpeg_read
5
  import torch
6
  from huggingface_hub import CommitScheduler
 
21
 
22
  device = 0 if torch.cuda.is_available() else "cpu"
23
 
24
+
25
+ # Quantization
26
+ bnb_config=BitsAndBytesConfig(
27
+ load_in_4bit=True
28
+ )
29
+
30
  # Load the model
31
+ model = WhisperForConditionalGeneration.from_pretrained(
32
+ MODEL_NAME, load_in_4bit=True, device_map="auto"
33
+ )
34
+
35
  tokenizer = WhisperTokenizer.from_pretrained(MODEL_NAME)
36
 
37
  # Initialize the pipeline with the quantized model