yellowcandle commited on
Commit
4b18df1
·
unverified ·
1 Parent(s): 344a72e

Tried to add youtube video upload

Browse files
Files changed (1) hide show
  1. app.py +27 -6
app.py CHANGED
@@ -1,9 +1,33 @@
1
  import spaces
2
  import gradio as gr
3
- # Use a pipeline as a high-level helper
 
 
4
  import torch
5
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @spaces.GPU(duration=60)
8
  def transcribe_audio(audio, model_id):
9
  if audio is None:
@@ -36,7 +60,6 @@ def transcribe_audio(audio, model_id):
36
  result = pipe(audio)
37
  return result["text"]
38
 
39
- # @spaces.GPU(duration=180)
40
  def proofread(text):
41
  if text is None:
42
  return "Please provide the transcribed text for proofreading."
@@ -50,15 +73,13 @@ def proofread(text):
50
  tokenizer = AutoTokenizer.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
51
  model.to(device)
52
 
53
- # Perform proofreading using the model
54
  input_text = prompt + text
55
  input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
56
- output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
57
  proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
58
 
59
  return proofread_text
60
 
61
-
62
  with gr.Blocks() as demo:
63
  gr.Markdown("""
64
  # Audio Transcription and Proofreading
@@ -70,7 +91,7 @@ with gr.Blocks() as demo:
70
  with gr.Row():
71
  with gr.Column():
72
  audio = gr.Audio(sources="upload", type="filepath")
73
- video = gr.Video(sources="upload")
74
  model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
75
 
76
  transcribe_button = gr.Button("Transcribe")
 
1
  import spaces
2
  import gradio as gr
3
+ import os
4
+ import logging
5
+ from pytube import YouTube
6
  import torch
7
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
8
 
9
+ def get_text(url):
10
+ if url != '':
11
+ output_text_transcribe = ''
12
+
13
+ yt = YouTube(url)
14
+ video = yt.streams.filter(only_audio=True).first()
15
+ out_file = video.download(output_path=".")
16
+
17
+ file_stats = os.stat(out_file)
18
+ logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
19
+
20
+ if file_stats.st_size <= 30000000:
21
+ base, ext = os.path.splitext(out_file)
22
+ new_file = base + '.mp3'
23
+ os.rename(out_file, new_file)
24
+ a = new_file
25
+
26
+ result = model.transcribe(a)
27
+ return result['text'].strip()
28
+ else:
29
+ logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
30
+
31
  @spaces.GPU(duration=60)
32
  def transcribe_audio(audio, model_id):
33
  if audio is None:
 
60
  result = pipe(audio)
61
  return result["text"]
62
 
 
63
  def proofread(text):
64
  if text is None:
65
  return "Please provide the transcribed text for proofreading."
 
73
  tokenizer = AutoTokenizer.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
74
  model.to(device)
75
 
 
76
  input_text = prompt + text
77
  input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
78
+ output = model.generate(input_ids, max_length=len(input_ids[0]) + 50, num_return_sequences=1, temperature=0.7)
79
  proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
80
 
81
  return proofread_text
82
 
 
83
  with gr.Blocks() as demo:
84
  gr.Markdown("""
85
  # Audio Transcription and Proofreading
 
91
  with gr.Row():
92
  with gr.Column():
93
  audio = gr.Audio(sources="upload", type="filepath")
94
+ input_text_url = gr.Textbox(label="Video URL")
95
  model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
96
 
97
  transcribe_button = gr.Button("Transcribe")