Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoProcessor, AutoModelForCausalLM
|
3 |
from PIL import Image
|
4 |
import torch
|
5 |
from gtts import gTTS
|
@@ -8,6 +9,9 @@ import requests
|
|
8 |
import nltk.tree
|
9 |
import re
|
10 |
|
|
|
|
|
|
|
11 |
# Carregar o modelo de português do spaCy
|
12 |
nlp = spacy.load("pt_core_news_sm")
|
13 |
|
@@ -192,14 +196,10 @@ def reordenar_sentenca(sentenca):
|
|
192 |
# Carregar os modelos
|
193 |
processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
194 |
model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
195 |
-
translation_model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
|
196 |
-
translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
|
197 |
-
translation_model = MarianMTModel.from_pretrained(translation_model_name)
|
198 |
|
199 |
# Configurar o dispositivo (GPU ou CPU)
|
200 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
201 |
model.to(device)
|
202 |
-
translation_model.to(device)
|
203 |
|
204 |
# Funções auxiliares
|
205 |
def prepare_image(image_path):
|
@@ -219,11 +219,6 @@ def generate_caption(pixel_values):
|
|
219 |
)
|
220 |
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
221 |
|
222 |
-
def translate_to_portuguese(text):
|
223 |
-
inputs = translation_tokenizer(text, return_tensors="pt", truncation=True).to(device)
|
224 |
-
translated_ids = translation_model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
|
225 |
-
return translation_tokenizer.batch_decode(translated_ids, skip_special_tokens=True)[0]
|
226 |
-
|
227 |
def text_to_speech_gtts(text, lang='pt'):
|
228 |
tts = gTTS(text=text, lang=lang)
|
229 |
tts.save("output.mp3")
|
@@ -232,8 +227,7 @@ def text_to_speech_gtts(text, lang='pt'):
|
|
232 |
# Função principal para processar a imagem e gerar a voz
|
233 |
def process_image(image):
|
234 |
_, pixel_values = prepare_image(image)
|
235 |
-
|
236 |
-
caption_pt = translate_to_portuguese(caption_en)
|
237 |
caption_pt = reordenar_sentenca(caption_pt)
|
238 |
audio_file = text_to_speech_gtts(caption_pt)
|
239 |
return caption_pt, audio_file
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
4 |
from PIL import Image
|
5 |
import torch
|
6 |
from gtts import gTTS
|
|
|
9 |
import nltk.tree
|
10 |
import re
|
11 |
|
12 |
+
# Baixar o modelo de português do spaCy
|
13 |
+
os.system("python -m spacy download pt_core_news_sm")
|
14 |
+
|
15 |
# Carregar o modelo de português do spaCy
|
16 |
nlp = spacy.load("pt_core_news_sm")
|
17 |
|
|
|
196 |
# Carregar os modelos
|
197 |
processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
198 |
model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
|
|
|
|
|
|
|
199 |
|
200 |
# Configurar o dispositivo (GPU ou CPU)
|
201 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
202 |
model.to(device)
|
|
|
203 |
|
204 |
# Funções auxiliares
|
205 |
def prepare_image(image_path):
|
|
|
219 |
)
|
220 |
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
221 |
|
|
|
|
|
|
|
|
|
|
|
222 |
def text_to_speech_gtts(text, lang='pt'):
|
223 |
tts = gTTS(text=text, lang=lang)
|
224 |
tts.save("output.mp3")
|
|
|
227 |
# Função principal para processar a imagem e gerar a voz
|
228 |
def process_image(image):
|
229 |
_, pixel_values = prepare_image(image)
|
230 |
+
caption_pt = generate_caption(pixel_values)
|
|
|
231 |
caption_pt = reordenar_sentenca(caption_pt)
|
232 |
audio_file = text_to_speech_gtts(caption_pt)
|
233 |
return caption_pt, audio_file
|