Spaces:
Build error
Build error
Commit
·
dc2ea44
1
Parent(s):
3c21144
Update app.py
Browse files
app.py
CHANGED
@@ -7,14 +7,15 @@ import requests
|
|
7 |
from io import BytesIO
|
8 |
|
9 |
# Carregar o modelo Qwen-VL e o tokenizer
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)
|
11 |
-
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat",
|
12 |
|
13 |
def generate_predictions(image_input, text_input):
|
14 |
# Inverter a imagem para corrigir o negativo
|
15 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
16 |
Image.fromarray((255 - (image_input * 255).astype('uint8'))).save(user_image_path)
|
17 |
|
|
|
18 |
# Preparar as entradas
|
19 |
query = tokenizer.from_list_format([
|
20 |
{'image': user_image_path},
|
@@ -23,9 +24,6 @@ def generate_predictions(image_input, text_input):
|
|
23 |
inputs = tokenizer(query, return_tensors='pt')
|
24 |
inputs = inputs.to(model.device)
|
25 |
|
26 |
-
# Correção: Converter todos os tensores no dicionário 'inputs' para HalfTensor (float16)
|
27 |
-
inputs = {key: value.to(torch.float16) for key, value in inputs.items()}
|
28 |
-
|
29 |
# Gerar a legenda
|
30 |
pred = model.generate(**inputs)
|
31 |
full_response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
|
@@ -45,6 +43,7 @@ def generate_predictions(image_input, text_input):
|
|
45 |
return image_with_boxes, frontend_response # Retornando a resposta formatada para o frontend
|
46 |
|
47 |
# Criar interface Gradio
|
|
|
48 |
iface = gr.Interface(
|
49 |
fn=generate_predictions,
|
50 |
inputs=[
|
@@ -66,4 +65,4 @@ iface = gr.Interface(
|
|
66 |
- **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.
|
67 |
""",
|
68 |
)
|
69 |
-
iface.launch()
|
|
|
7 |
from io import BytesIO
|
8 |
|
9 |
# Carregar o modelo Qwen-VL e o tokenizer
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4",load_in_4bit=True, device_map="auto", trust_remote_code=True).eval()
|
12 |
|
13 |
def generate_predictions(image_input, text_input):
|
14 |
# Inverter a imagem para corrigir o negativo
|
15 |
user_image_path = "/tmp/user_input_test_image.jpg"
|
16 |
Image.fromarray((255 - (image_input * 255).astype('uint8'))).save(user_image_path)
|
17 |
|
18 |
+
|
19 |
# Preparar as entradas
|
20 |
query = tokenizer.from_list_format([
|
21 |
{'image': user_image_path},
|
|
|
24 |
inputs = tokenizer(query, return_tensors='pt')
|
25 |
inputs = inputs.to(model.device)
|
26 |
|
|
|
|
|
|
|
27 |
# Gerar a legenda
|
28 |
pred = model.generate(**inputs)
|
29 |
full_response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
|
|
|
43 |
return image_with_boxes, frontend_response # Retornando a resposta formatada para o frontend
|
44 |
|
45 |
# Criar interface Gradio
|
46 |
+
# Create Gradio interface
|
47 |
iface = gr.Interface(
|
48 |
fn=generate_predictions,
|
49 |
inputs=[
|
|
|
65 |
- **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.
|
66 |
""",
|
67 |
)
|
68 |
+
iface.launch()
|