histlearn commited on
Commit
6b05787
·
verified ·
1 Parent(s): 17febdd

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -253
app.py DELETED
@@ -1,253 +0,0 @@
1
- import os
2
- import gradio as gr
3
- from transformers import AutoProcessor, AutoModelForCausalLM
4
- from PIL import Image
5
- import torch
6
- from gtts import gTTS
7
- import spacy
8
- import requests
9
- import nltk.tree
10
- import re
11
-
12
- # Baixar o modelo de português do spaCy
13
- os.system("python -m spacy download pt_core_news_sm")
14
-
15
- # Carregar o modelo de português do spaCy
16
- nlp = spacy.load("pt_core_news_sm")
17
-
18
- # Chave para o LX-Parser
19
- key = "eb159d39469d84f0ff47167a4d89cada"
20
-
21
- # Funções de manipulação gramatical
22
- def invert_adj_n(doc, tags):
23
- frase = []
24
- already = False
25
- for i in range(len(doc)):
26
- if already:
27
- already = False
28
- continue
29
- if doc[i].tag_ != "PUNCT":
30
- if tags[i] == "A":
31
- if i + 1 < len(tags) and tags[i + 1] == "N":
32
- frase.append(doc[i + 1].text)
33
- frase.append(doc[i].text)
34
- already = True
35
- else:
36
- frase.append(doc[i].text)
37
- else:
38
- frase.append(doc[i].text)
39
- else:
40
- frase.append(doc[i].text)
41
- return frase
42
-
43
- def adjust_adj(doc, tags):
44
- frase = []
45
- for i in range(len(doc)):
46
- frase.append(doc[i].text)
47
- if tags[i] == "A":
48
- if i + 1 < len(tags) and tags[i + 1] == "A":
49
- frase.append("e")
50
- return frase
51
-
52
- def adjust_art(doc, tags):
53
- frase = []
54
- already = False
55
- for i in range(len(doc)):
56
- if already:
57
- already = False
58
- continue
59
- text = doc[i].text
60
- if tags[i] == "ART" and text.lower() == "a":
61
- if i + 1 < len(doc):
62
- gender = doc[i + 1].morph.get("Gender")
63
- number = doc[i + 1].morph.get("Number")
64
- if gender and number:
65
- if gender[0] == "Masc" and number[0] == "Sing":
66
- frase.append("um")
67
- elif gender[0] == "Fem" and number[0] == "Sing":
68
- frase.append("uma")
69
- elif gender[0] == "Masc" and number[0] != "Sing":
70
- frase.append("os")
71
- else:
72
- frase.append("as")
73
- else:
74
- frase.append(text)
75
- else:
76
- frase.append(text)
77
- else:
78
- frase.append(text)
79
- return frase
80
-
81
- def create_sentence(doc, tags, frase):
82
- tmp = frase
83
- for i in range(len(doc)):
84
- text = doc[i].text
85
- if doc[i].is_sent_start:
86
- tmp[i] = tmp[i].capitalize()
87
- if doc[i].tag_ == "PUNCT":
88
- tmp[i - 1] += text
89
- return tmp
90
-
91
- def get_productions(texto):
92
- format = 'parentheses'
93
- url = "https://portulanclarin.net/workbench/lx-parser/api/"
94
- request_data = {
95
- 'method': 'parse',
96
- 'jsonrpc': '2.0',
97
- 'id': 0,
98
- 'params': {
99
- 'text': texto,
100
- 'format': format,
101
- 'key': key,
102
- },
103
- }
104
- request = requests.post(url, json=request_data)
105
- response_data = request.json()
106
- if "error" in response_data:
107
- print("Error:", response_data["error"])
108
- return []
109
- else:
110
- result = response_data["result"]
111
- productions = []
112
- tree = nltk.tree.Tree.fromstring(result)
113
- for tag in tree.productions():
114
- if len(re.findall(r"'.*'", str(tag))) > 0:
115
- productions.append(str(tag))
116
- return productions
117
-
118
- def get_tags(productions):
119
- tags = []
120
- for item in productions:
121
- if isinstance(item, str):
122
- tags.append(item[:item.find(' ->')])
123
- else:
124
- tags.append(item)
125
- for item in tags:
126
- if "'" in item:
127
- tags.remove(item)
128
- return tags
129
-
130
- def reordenar_sentenca(sentenca):
131
- if not sentenca.strip():
132
- return sentenca
133
- sentenca = sentenca.lower()
134
- sentence = get_productions(sentenca)
135
- tags = get_tags(sentence)
136
- doc = nlp(sentenca)
137
- if tags[0] != "ART":
138
- sentenca = "A " + sentenca.strip()
139
- sentence = get_productions(sentenca)
140
- tags = get_tags(sentence)
141
- doc = nlp(sentenca)
142
- if not sentence:
143
- return sentenca.strip()
144
- aux = []
145
- if len(tags) > 2 and tags[1] == "N" and tags[2] == "N":
146
- aux = sentenca.split()
147
- tmp = aux[1]
148
- aux[1] = aux[2]
149
- aux.insert(2, "de")
150
- aux[3] = tmp
151
- sentenca = " ".join(aux)
152
- sentence = get_productions(sentenca)
153
- tags = get_tags(sentence)
154
- doc = nlp(sentenca)
155
- frase = []
156
- already = False
157
- person = 3
158
- tmp_doc = []
159
- for token in doc:
160
- tmp_doc.append(token)
161
- frase = invert_adj_n(tmp_doc, tags)
162
- nova_sentenca = ' '.join(frase)
163
- productions = get_productions(nova_sentenca)
164
- tags = get_tags(productions)
165
- doc = nlp(nova_sentenca)
166
- while nova_sentenca != sentenca:
167
- frase = invert_adj_n(doc, tags)
168
- sentenca = nova_sentenca
169
- nova_sentenca = ' '.join(frase)
170
- productions = get_productions(nova_sentenca)
171
- tags = get_tags(productions)
172
- doc = nlp(nova_sentenca)
173
- frase = adjust_adj(doc, tags)
174
- nova_sentenca = ' '.join(frase)
175
- productions = get_productions(nova_sentenca)
176
- tags = get_tags(productions)
177
- doc = nlp(nova_sentenca)
178
- while nova_sentenca != sentenca:
179
- frase = adjust_adj(doc, tags)
180
- sentenca = nova_sentenca
181
- nova_sentenca = ' '.join(frase)
182
- productions = get_productions(nova_sentenca)
183
- tags = get_tags(productions)
184
- doc = nlp(nova_sentenca)
185
- frase = adjust_art(doc, tags)
186
- sentenca = ' '.join(frase)
187
- productions = get_productions(sentenca)
188
- tags = get_tags(productions)
189
- doc = nlp(sentenca)
190
- frase = create_sentence(doc, tags, frase)
191
- sentenca_normalizada = ""
192
- for i in range(len(frase)):
193
- sentenca_normalizada += frase[i] + " "
194
- return sentenca_normalizada.strip()
195
-
196
- # Carregar os modelos
197
- processor = AutoProcessor.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
198
- model = AutoModelForCausalLM.from_pretrained("histlearn/microsoft-git-portuguese-neuro-simbolic")
199
-
200
- # Configurar o dispositivo (GPU ou CPU)
201
- device = "cuda" if torch.cuda.is_available() else "cpu"
202
- model.to(device)
203
-
204
- # Funções auxiliares
205
- def prepare_image(image_path):
206
- image = Image.open(image_path).convert("RGB")
207
- inputs = processor(images=image, return_tensors="pt").to(device)
208
- return image, inputs.pixel_values
209
-
210
- def generate_caption(pixel_values):
211
- model.eval()
212
- with torch.no_grad():
213
- generated_ids = model.generate(
214
- pixel_values=pixel_values,
215
- max_length=50,
216
- num_beams=4,
217
- early_stopping=True,
218
- no_repeat_ngram_size=2
219
- )
220
- return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
221
-
222
- def text_to_speech_gtts(text, lang='pt'):
223
- tts = gTTS(text=text, lang=lang)
224
- tts.save("output.mp3")
225
- return "output.mp3"
226
-
227
- # Função principal para processar a imagem e gerar a voz
228
- def process_image(image):
229
- _, pixel_values = prepare_image(image)
230
- caption_pt = generate_caption(pixel_values)
231
- caption_pt = reordenar_sentenca(caption_pt)
232
- audio_file = text_to_speech_gtts(caption_pt)
233
- return caption_pt, audio_file
234
-
235
- # Caminhos para as imagens de exemplo
236
- example_image_paths = [
237
- "main/example1.jpeg",
238
- "main/example2.jpeg",
239
- "main/example3.jpeg"
240
- ]
241
-
242
- # Interface Gradio
243
- iface = gr.Interface(
244
- fn=process_image,
245
- inputs=gr.Image(type="filepath"),
246
- outputs=[gr.Textbox(), gr.Audio(type="filepath")],
247
- examples=example_image_paths,
248
- title="Image to Voice",
249
- description="Gera uma descrição em português e a converte em voz a partir de uma imagem."
250
- )
251
-
252
- if __name__ == "__main__":
253
- iface.launch()