J-LAB commited on
Commit
9ec0fd2
·
verified ·
1 Parent(s): c279f7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -19
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
4
-
5
  import io
 
6
  from PIL import Image
7
  import subprocess
8
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
- #
 
10
  model_id = 'J-LAB/Florence-vl3'
11
  model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
12
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
@@ -32,22 +33,35 @@ def run_example(task_prompt, image):
32
  )
33
  return parsed_answer
34
 
 
35
  def process_image(image, task_prompt):
36
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
 
 
 
 
 
 
 
 
 
 
 
37
  if task_prompt == 'Product Caption':
38
  task_prompt = '<MORE_DETAILED_CAPTION>'
39
  elif task_prompt == 'OCR':
40
  task_prompt = '<OCR>'
41
 
 
42
  results = run_example(task_prompt, image)
43
 
44
- # Remove the key and get the text value
45
  if results and task_prompt in results:
46
  output_text = results[task_prompt]
47
  else:
48
  output_text = ""
49
 
50
- # Convert newline characters to HTML line breaks
51
  output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
52
 
53
  return output_text
@@ -76,16 +90,14 @@ document.querySelector('button').addEventListener('click', function() {
76
  });
77
  """
78
 
79
- single_task_list =[
80
- 'Product Caption', 'OCR'
81
- ]
82
 
83
  with gr.Blocks(css=css) as demo:
84
  gr.Markdown(DESCRIPTION)
85
  with gr.Tab(label="Product Image Select"):
86
  with gr.Row():
87
  with gr.Column():
88
- input_img = gr.Image(label="Input Picture")
89
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
90
  submit_btn = gr.Button(value="Submit")
91
  with gr.Column():
@@ -96,20 +108,30 @@ with gr.Blocks(css=css) as demo:
96
  To use this model via API, you can follow the example code below:
97
 
98
  ```python
99
- !pip install gradio_client
100
- from gradio_client import Client, handle_file
101
-
102
- client = Client("J-LAB/Fluxi-IA")
103
- result = client.predict(
104
- image=handle_file('https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png'),
105
- api_name="/process_image"
106
- )
107
- print(result)
 
 
 
 
 
 
 
 
 
108
  ```
 
109
  """)
110
 
111
  submit_btn.click(process_image, [input_img, task_prompt], [output_text])
112
 
113
  demo.load(lambda: None, inputs=None, outputs=None, js=js)
114
 
115
- demo.launch(debug=True)
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
 
4
  import io
5
+ import base64 # Adicionando a biblioteca base64 para decodificação
6
  from PIL import Image
7
  import subprocess
8
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
+
10
+ # Carregando o modelo e o processador
11
  model_id = 'J-LAB/Florence-vl3'
12
  model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
13
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 
33
  )
34
  return parsed_answer
35
 
36
+ # Função para processar imagens, agora suportando Base64
37
  def process_image(image, task_prompt):
38
+ # Verifica se a imagem é uma string base64
39
+ if isinstance(image, str) and image.startswith("data:image"):
40
+ # Extraindo a parte base64 da string
41
+ base64_image = image.split(",")[1]
42
+ # Decodificando a imagem base64
43
+ image = Image.open(io.BytesIO(base64.b64decode(base64_image)))
44
+ elif isinstance(image, bytes):
45
+ image = Image.open(io.BytesIO(image))
46
+ else:
47
+ image = Image.fromarray(image) # Convertendo um array NumPy para imagem PIL, se aplicável
48
+
49
+ # Mapeando os prompts de tarefas
50
  if task_prompt == 'Product Caption':
51
  task_prompt = '<MORE_DETAILED_CAPTION>'
52
  elif task_prompt == 'OCR':
53
  task_prompt = '<OCR>'
54
 
55
+ # Chamando o exemplo com a imagem processada e o prompt da tarefa
56
  results = run_example(task_prompt, image)
57
 
58
+ # Extraindo o texto gerado a partir dos resultados
59
  if results and task_prompt in results:
60
  output_text = results[task_prompt]
61
  else:
62
  output_text = ""
63
 
64
+ # Convertendo quebras de linha para quebras de linha HTML
65
  output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
66
 
67
  return output_text
 
90
  });
91
  """
92
 
93
+ single_task_list =[ 'Product Caption', 'OCR' ]
 
 
94
 
95
  with gr.Blocks(css=css) as demo:
96
  gr.Markdown(DESCRIPTION)
97
  with gr.Tab(label="Product Image Select"):
98
  with gr.Row():
99
  with gr.Column():
100
+ input_img = gr.Image(label="Input Picture", tool="editor", source="upload", type="pil") # Suporte a PIL images
101
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
102
  submit_btn = gr.Button(value="Submit")
103
  with gr.Column():
 
108
  To use this model via API, you can follow the example code below:
109
 
110
  ```python
111
+ import base64
112
+ from PIL import Image
113
+ import io
114
+ import requests
115
+
116
+ # Converting image to base64
117
+ image_path = 'path_to_image.png'
118
+ with open(image_path, 'rb') as image_file:
119
+ image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
120
+
121
+ # Preparing the payload
122
+ payload = {
123
+ "image": f"data:image/png;base64,{image_base64}",
124
+ "task_prompt": "Product Caption"
125
+ }
126
+
127
+ response = requests.post("http://your-space-url-here", json=payload)
128
+ print(response.json())
129
  ```
130
+
131
  """)
132
 
133
  submit_btn.click(process_image, [input_img, task_prompt], [output_text])
134
 
135
  demo.load(lambda: None, inputs=None, outputs=None, js=js)
136
 
137
+ demo.launch(debug=True)