artificialguybr commited on
Commit
dc2ea44
·
1 Parent(s): 3c21144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -7,14 +7,15 @@ import requests
7
  from io import BytesIO
8
 
9
  # Carregar o modelo Qwen-VL e o tokenizer
10
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)
11
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat", load_in_4bit=True, device_map="auto", trust_remote_code=True).eval()
12
 
13
  def generate_predictions(image_input, text_input):
14
  # Inverter a imagem para corrigir o negativo
15
  user_image_path = "/tmp/user_input_test_image.jpg"
16
  Image.fromarray((255 - (image_input * 255).astype('uint8'))).save(user_image_path)
17
 
 
18
  # Preparar as entradas
19
  query = tokenizer.from_list_format([
20
  {'image': user_image_path},
@@ -23,9 +24,6 @@ def generate_predictions(image_input, text_input):
23
  inputs = tokenizer(query, return_tensors='pt')
24
  inputs = inputs.to(model.device)
25
 
26
- # Correção: Converter todos os tensores no dicionário 'inputs' para HalfTensor (float16)
27
- inputs = {key: value.to(torch.float16) for key, value in inputs.items()}
28
-
29
  # Gerar a legenda
30
  pred = model.generate(**inputs)
31
  full_response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
@@ -45,6 +43,7 @@ def generate_predictions(image_input, text_input):
45
  return image_with_boxes, frontend_response # Retornando a resposta formatada para o frontend
46
 
47
  # Criar interface Gradio
 
48
  iface = gr.Interface(
49
  fn=generate_predictions,
50
  inputs=[
@@ -66,4 +65,4 @@ iface = gr.Interface(
66
  - **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.
67
  """,
68
  )
69
- iface.launch()
 
7
  from io import BytesIO
8
 
9
  # Carregar o modelo Qwen-VL e o tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4",load_in_4bit=True, device_map="auto", trust_remote_code=True).eval()
12
 
13
  def generate_predictions(image_input, text_input):
14
  # Inverter a imagem para corrigir o negativo
15
  user_image_path = "/tmp/user_input_test_image.jpg"
16
  Image.fromarray((255 - (image_input * 255).astype('uint8'))).save(user_image_path)
17
 
18
+
19
  # Preparar as entradas
20
  query = tokenizer.from_list_format([
21
  {'image': user_image_path},
 
24
  inputs = tokenizer(query, return_tensors='pt')
25
  inputs = inputs.to(model.device)
26
 
 
 
 
27
  # Gerar a legenda
28
  pred = model.generate(**inputs)
29
  full_response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
 
43
  return image_with_boxes, frontend_response # Retornando a resposta formatada para o frontend
44
 
45
  # Criar interface Gradio
46
+ # Create Gradio interface
47
  iface = gr.Interface(
48
  fn=generate_predictions,
49
  inputs=[
 
65
  - **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.
66
  """,
67
  )
68
+ iface.launch()