Spaces:
Runtime error
Runtime error
File size: 5,908 Bytes
a37aa0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
from TTS.api import TTS
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
import feedparser
import re
language_map = {
'en': 'English',
'fr': 'French'
}
# Add default RSS feeds
rss_feed_map = {
"NY Times": 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml',
"Fox News": 'https://moxie.foxnews.com/google-publisher/latest.xml',
"Yahoo! News": 'https://www.yahoo.com/news/rss',
"France 24": 'https://www.france24.com/fr/rss',
"France Info": 'https://www.francetvinfo.fr/titres.rss'
}
def get_rss_feeds(default_choices, custom_choices):
custom_rss_feeds = custom_choices.split("\n")
if custom_rss_feeds == ['']:
return list(set([rss_feed_map[key] for key in default_choices]))
return list(set(custom_rss_feeds + [rss_feed_map[key] for key in default_choices]))
# RSS feeds
def is_url(string):
url_pattern = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return re.match(url_pattern, string) is not None
def fetch_news(rss_feed):
if not is_url(rss_feed):
raise ValueError(f"{rss_feed} is not a valid RSS feed.")
news = []
feed = feedparser.parse(rss_feed)
for entry in feed.entries:
news.append(entry.title)
return news
def fetch_news_multiple_urls(rss_feeds):
return [news for rss_feed in rss_feeds for news in fetch_news(rss_feed)]
# Language_id
model_ckpt = "papluca/xlm-roberta-base-language-detection"
pipe = pipeline("text-classification", model=model_ckpt)
def language_id(strings:list[str]):
return [(string,language_map[pipe(string, top_k=1, truncation=True)[0]['label']]) for string in strings]
# Translation
## Initialize T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
def translate(source_text_with_id, target_language):
# source_text_with_id = ('text','French') for example
source_language = source_text_with_id[1]
assert source_language in language_map.values(), f"{source_language} language is not supported."
assert target_language in language_map.values(), f"{target_language} language is not supported."
source_text = f"translate {source_language} to {target_language}: " + source_text_with_id[0]
# Tokenize input text
input_ids = tokenizer.encode(source_text, return_tensors="pt")
# Generate translation
translated_ids = model.generate(input_ids=input_ids, max_length=100, num_beams=4, early_stopping=True)
# Decode translated text
return tokenizer.decode(translated_ids[0], skip_special_tokens=True)
def translate_multiple(source_texts_with_id, target_language):
return [translate(source_text_with_id, target_language) for source_text_with_id in source_texts_with_id]
# Speech generation
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
def read_news(text,input,output,language):
assert language in language_map.keys(), f"{language} language is not supported."
print("speech generation starting")
tts.tts_to_file(text=text,
file_path=output,
speaker_wav=input,
language=language)
print("speech generation done")
return output
# Gradio interface
def process(radio_value, textbox_value, audio_value, checkbox_value):
inputs = {
"language": radio_value,
"rss_feed_urls": textbox_value,
"audio": audio_value,
"selected_feeds": checkbox_value
}
print("Inputs to Gradio Blocks:")
print(inputs)
rss_feeds = get_rss_feeds(checkbox_value,textbox_value)
print("rss_feeds=",rss_feeds)
news = fetch_news_multiple_urls(rss_feeds)
print("news=",news[:2])
news_with_language_id = language_id(news)
print("news_with_language_id=",news_with_language_id[:2])
translated_news = translate_multiple(news_with_language_id, radio_value)
print("translated_news=",translated_news[:2])
language = next((key for key, val in language_map.items() if val == radio_value), None)
print("language=",language)
all_news = ' '.join(translated_news)
print("all_news=",all_news[:80])
output_path = "output.wav"
return read_news(all_news,audio_value,output_path,language)
with gr.Blocks() as demo:
gr.Markdown("Customize your newsletter and then click **Fetch News** to download the audio output.")
with gr.Row():
radio = gr.Radio(
label='Choose the language of the output',
info="If the output language doesn't match the language of an RSS feed, an AI model will take care of translation",
choices=["English", "French"]
)
with gr.Row():
textbox = gr.Textbox(
placeholder='https://www.francetvinfo.fr/titres.rss',
label='Add custom RSS feeds to your newsletter',
info='The provided urls needed to be written each in a separate line'
)
with gr.Row():
audio = gr.Audio(
label="Upload a sample audio of someone speaking. The voice of the output will match the voice of the input.",
type='filepath'
)
with gr.Row():
checkboxgroup = gr.CheckboxGroup(
["NY Times", "Fox News", "Yahoo! News", "France 24", "France Info"],
label="RSS feeds",
info="Default RSS feeds"
)
with gr.Row():
btn = gr.Button(value='Fetch News')
with gr.Row():
out = gr.DownloadButton("📂 Click to download file")
btn.click(
fn=process,
inputs=[radio, textbox, audio, checkboxgroup],
outputs=out
)
demo.launch(debug=True) |