SaladSlayer00 commited on
Commit
2bc9534
·
1 Parent(s): ceaf90a

the new app with translation support, youtube and summarizaiton, pdf, tts, examples

Browse files
Files changed (2) hide show
  1. app.py +112 -16
  2. requirements.txt +10 -2
app.py CHANGED
@@ -1,11 +1,37 @@
1
- from transformers import pipeline
2
  import gradio as gr
 
 
 
3
  from huggingface_hub import HfFolder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
 
 
5
 
6
- token = HfFolder.get_token()
7
- pipe = pipeline(model="SaladSlayer00/another_local")
8
- def transcribe(rec=None, file=None):
 
 
 
 
9
  if rec is not None:
10
  audio = rec
11
  elif file is not None:
@@ -13,19 +39,89 @@ def transcribe(rec=None, file=None):
13
  else:
14
  return "Provide a recording or a file."
15
 
16
- text = pipe(audio)["text"]
17
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- iface = gr.Interface(
21
- fn=transcribe,
22
- inputs=[
23
- gr.Audio(type="filepath")
24
- ],
25
- outputs="text",
26
- title="Whisper Small Italian",
27
- description="Realtime demo for Italian speech recognition using a fine-tuned Whisper model.",
28
- )
 
 
29
 
30
 
31
- iface.launch()
 
 
1
  import gradio as gr
2
+ import pytube as pt
3
+ from transformers import pipeline
4
+ import os
5
  from huggingface_hub import HfFolder
6
+ from gtts import gTTS
7
+ from fpdf import FPDF
8
+ from pdfminer.high_level import extract_text
9
+
10
+
11
+ # Initialize pipelines for transcription, summarization, and translation
12
+ transcription_pipe = pipeline(model="SaladSlayer00/another_local", token=HfFolder.get_token())
13
+ summarizer = pipeline("summarization", model="it5/it5-efficient-small-el32-news-summarization")
14
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-it-en")
15
+
16
+ def process_audio(file_path):
17
+ text = transcription_pipe(file_path)["text"]
18
+ summary = summarizer(text, min_length=25, max_length=50)[0]["summary_text"]
19
+ translation = translator(text)[0]["translation_text"]
20
+ return text, summary, translation
21
 
22
+ def download_youtube_audio(yt_url):
23
+ yt = pt.YouTube(yt_url)
24
+ stream = yt.streams.filter(only_audio=True).first()
25
+ file_path = stream.download(filename="temp_audio.mp3")
26
+ return file_path
27
 
28
+ def youtube_transcription(yt_url):
29
+ audio_path = download_youtube_audio(yt_url)
30
+ results = process_audio(audio_path)
31
+ os.remove(audio_path) # Clean up the downloaded file
32
+ return results
33
+
34
+ def transcribe_and_process(rec=None, file=None):
35
  if rec is not None:
36
  audio = rec
37
  elif file is not None:
 
39
  else:
40
  return "Provide a recording or a file."
41
 
42
+ return process_audio(audio)
43
+
44
+ def save_text_to_pdf(text, filename="output.pdf"):
45
+ # Create instance of FPDF class
46
+ pdf = FPDF()
47
+
48
+ # Add a page
49
+ pdf.add_page()
50
+
51
+ # Set font: Arial, bold, 12
52
+ pdf.set_font("Arial", size=12)
53
+
54
+ # Add a cell
55
+ pdf.multi_cell(0, 10, text)
56
+
57
+ # Save the pdf with name .pdf
58
+ pdf.output(filename)
59
+
60
+ return filename
61
+
62
+
63
+ def pdf_to_text(file_path):
64
+ text = extract_text(file_path)
65
+ audio_file = "tts_audio.wav"
66
+ myobj = gTTS(text=text, lang='en', slow=False)
67
+ myobj.save(audio_file)
68
+ return audio_file
69
+
70
+ def audio_to_pdf(file_path):
71
+ text, summary, translation = process_audio(file_path)
72
+ pdf_file = save_text_to_pdf(translation)
73
+ tts_audio_file = pdf_to_text(pdf_file) # Generate TTS audio from the PDF
74
+ return translation, pdf_file, tts_audio_file
75
+
76
+ def pdf_to_audio(file_path):
77
+ text = extract_text(file_path)
78
+ myobj = gTTS(text=text, lang='en', slow=False)
79
+ audio_file = "output_audio.wav"
80
+ myobj.save(audio_file)
81
+ return audio_file
82
+
83
+ app = gr.Blocks()
84
+
85
+ with app:
86
+ gr.Markdown("### Whisper Small Italian Transcription, Summarization, and Translation")
87
+ gr.Markdown("Talk, upload an audio file or enter a YouTube URL for processing.")
88
+
89
+ with gr.Tab("Audio Processing"):
90
+ gr.Markdown("### Example Audio Files")
91
+ gr.Audio("/content/newspace/examples/La_Casa.mp3", label="Short Audio 1")
92
+ gr.Audio("/content/newspace/examples/La_Neve.mp3", label="Short Audio 2")
93
+ gr.Audio("/content/newspace/examples/La_Lettera.mp3", label="Long Audio 3")
94
+ gr.Audio("/content/newspace/examples/Le_Feste.mp3", label="Long Audio 4")
95
+ with gr.Row():
96
+ audio_input = gr.Audio(label="Upload Audio or Record", type="filepath")
97
+ audio_process_button = gr.Button("Process Audio")
98
+ audio_transcription, audio_summary, audio_translation = gr.Textbox(label="Transcription"), gr.Textbox(label="Summary"), gr.Textbox(label="Translation")
99
+ audio_process_button.click(fn=transcribe_and_process, inputs=audio_input, outputs=[audio_transcription, audio_summary, audio_translation])
100
 
101
+
102
+ with gr.Tab("YouTube Processing"):
103
+ gr.Markdown("### Example YouTube URLs")
104
+ gr.Markdown("1. [The House](https://www.youtube.com/watch?v=Is6nHH43rnQ)")
105
+ gr.Markdown("2. [Introduction](https://www.youtube.com/watch?v=l_p0UVsdc6A)")
106
+ gr.Markdown("3. [Where Are You From?](https://www.youtube.com/watch?v=4QobTwKT_Xc)")
107
+ gr.Markdown("4. [The Colors](https://www.youtube.com/watch?v=HsSLwV1yEjc)")
108
+ with gr.Row():
109
+ yt_input = gr.Textbox(label="YouTube URL")
110
+ yt_process_button = gr.Button("Process YouTube Video")
111
+ yt_transcription, yt_summary, yt_translation = gr.Textbox(label="Transcription"), gr.Textbox(label="Summary"), gr.Textbox(label="Translation")
112
+ yt_process_button.click(fn=youtube_transcription, inputs=yt_input, outputs=[yt_transcription, yt_summary, yt_translation])
113
 
114
+ with gr.Tab("Italian Audio to English PDF"):
115
+ gr.Markdown("### Example Audio Files")
116
+ gr.Audio("/content/newspace/examples/La_Casa.mp3", label="Short Audio 1")
117
+ gr.Audio("/content/newspace/examples/La_Neve.mp3", label="Short Audio 2")
118
+ gr.Audio("/content/newspace/examples/La_Lettera.mp3", label="Long Audio 3")
119
+ gr.Audio("/content/newspace/examples/Le_Feste.mp3", label="Long Audio 4")
120
+ with gr.Row():
121
+ audio_input = gr.Audio(label="Upload Italian Audio", type="filepath")
122
+ translate_process_button = gr.Button("Translate and Save as PDF")
123
+ translation_textbox, pdf_download, tts_audio = gr.Textbox(label="Translation"), gr.File(label="Download PDF"), gr.Audio(label="TTS Audio")
124
+ translate_process_button.click(fn=audio_to_pdf, inputs=audio_input, outputs=[translation_textbox, pdf_download, tts_audio])
125
 
126
 
127
+ (app.launch())
requirements.txt CHANGED
@@ -1,3 +1,11 @@
1
- transformers
2
  gradio
3
- torch
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ sentencepiece
3
+ pytube
4
+ soundfile
5
+ datasets
6
+ transformers
7
+ torch
8
+ gtts
9
+ pdfminer
10
+ pdfminer-six
11
+ fpdf