histlearn commited on
Commit
0314b2b
·
verified ·
1 Parent(s): a5b8d10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -12
app.py CHANGED
@@ -1,5 +1,6 @@
 
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForCausalLM, MarianMTModel, MarianTokenizer
3
  from PIL import Image
4
  import torch
5
  from gtts import gTTS
@@ -8,6 +9,9 @@ import requests
8
  import nltk.tree
9
  import re
10
 
 
 
 
11
  # Carregar o modelo de português do spaCy
12
  nlp = spacy.load("pt_core_news_sm")
13
 
@@ -192,14 +196,10 @@ def reordenar_sentenca(sentenca):
192
  # Carregar os modelos
193
  processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
194
  model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
195
- translation_model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
196
- translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name)
197
- translation_model = MarianMTModel.from_pretrained(translation_model_name)
198
 
199
  # Configurar o dispositivo (GPU ou CPU)
200
  device = "cuda" if torch.cuda.is_available() else "cpu"
201
  model.to(device)
202
- translation_model.to(device)
203
 
204
  # Funções auxiliares
205
  def prepare_image(image_path):
@@ -219,11 +219,6 @@ def generate_caption(pixel_values):
219
  )
220
  return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
221
 
222
- def translate_to_portuguese(text):
223
- inputs = translation_tokenizer(text, return_tensors="pt", truncation=True).to(device)
224
- translated_ids = translation_model.generate(inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)
225
- return translation_tokenizer.batch_decode(translated_ids, skip_special_tokens=True)[0]
226
-
227
  def text_to_speech_gtts(text, lang='pt'):
228
  tts = gTTS(text=text, lang=lang)
229
  tts.save("output.mp3")
@@ -232,8 +227,7 @@ def text_to_speech_gtts(text, lang='pt'):
232
  # Função principal para processar a imagem e gerar a voz
233
  def process_image(image):
234
  _, pixel_values = prepare_image(image)
235
- caption_en = generate_caption(pixel_values)
236
- caption_pt = translate_to_portuguese(caption_en)
237
  caption_pt = reordenar_sentenca(caption_pt)
238
  audio_file = text_to_speech_gtts(caption_pt)
239
  return caption_pt, audio_file
 
1
+ import os
2
  import gradio as gr
3
+ from transformers import AutoProcessor, AutoModelForCausalLM
4
  from PIL import Image
5
  import torch
6
  from gtts import gTTS
 
9
  import nltk.tree
10
  import re
11
 
12
+ # Baixar o modelo de português do spaCy
13
+ os.system("python -m spacy download pt_core_news_sm")
14
+
15
  # Carregar o modelo de português do spaCy
16
  nlp = spacy.load("pt_core_news_sm")
17
 
 
196
  # Carregar os modelos
197
  processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
198
  model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
 
 
 
199
 
200
  # Configurar o dispositivo (GPU ou CPU)
201
  device = "cuda" if torch.cuda.is_available() else "cpu"
202
  model.to(device)
 
203
 
204
  # Funções auxiliares
205
  def prepare_image(image_path):
 
219
  )
220
  return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
221
 
 
 
 
 
 
222
  def text_to_speech_gtts(text, lang='pt'):
223
  tts = gTTS(text=text, lang=lang)
224
  tts.save("output.mp3")
 
227
  # Função principal para processar a imagem e gerar a voz
228
  def process_image(image):
229
  _, pixel_values = prepare_image(image)
230
+ caption_pt = generate_caption(pixel_values)
 
231
  caption_pt = reordenar_sentenca(caption_pt)
232
  audio_file = text_to_speech_gtts(caption_pt)
233
  return caption_pt, audio_file