maximilian commited on
Commit
36a6fc9
1 Parent(s): d65aa05

initial commit

Browse files
Files changed (5) hide show
  1. app.py +101 -0
  2. image1.jpg +0 -0
  3. image2.jpg +0 -0
  4. pre-requirements.txt +1 -0
  5. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
3
+ import spaces
4
+ from PIL import Image
5
+ import io
6
+ import subprocess
7
+ subprocess.run("pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True)
8
+
9
+ models = {
10
+ "maxiw/Florence-2-ScreenQA-base": AutoModelForCausalLM.from_pretrained("maxiw/Florence-2-ScreenQA-base", trust_remote_code=True).to("cuda").eval(),
11
+ }
12
+
13
+ processors = {
14
+ "maxiw/Florence-2-ScreenQA-base": AutoProcessor.from_pretrained("maxiw/Florence-2-ScreenQA-base", trust_remote_code=True),
15
+ }
16
+
17
+
18
+ DESCRIPTION = "# [Florence-2-ScreenQA Demo](https://huggingface.co/maxiw/Florence-2-ScreenQA-base)"
19
+
20
+
21
+ @spaces.GPU
22
+ def run_example(task_prompt, image, text_input=None, model_id="maxiw/Florence-2-ScreenQA-base"):
23
+ model = models[model_id]
24
+ processor = processors[model_id]
25
+ if text_input is None:
26
+ prompt = task_prompt
27
+ else:
28
+ prompt = task_prompt + text_input
29
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
30
+ generated_ids = model.generate(
31
+ input_ids=inputs["input_ids"],
32
+ pixel_values=inputs["pixel_values"],
33
+ max_new_tokens=1024,
34
+ early_stopping=False,
35
+ do_sample=False,
36
+ num_beams=3,
37
+ )
38
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
39
+ parsed_answer = processor.post_process_generation(
40
+ generated_text,
41
+ task=task_prompt,
42
+ image_size=(image.width, image.height)
43
+ )
44
+ if "<SQA>" in parsed_answer:
45
+ parsed_answer = parsed_answer["<SQA>"]
46
+ return parsed_answer
47
+
48
+
49
+ def process_image(image, task_prompt, text_input=None, model_id="maxiw/Florence-2-ScreenQA-base"):
50
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
51
+ if task_prompt == "ScreenQA":
52
+ task_prompt = "<SQA>"
53
+ results = run_example(task_prompt, image, text_input, model_id=model_id)
54
+ return results
55
+ else:
56
+ print("Unknown task prompt")
57
+ return "", None # Return empty string and None for unknown task prompts
58
+
59
+ css = """
60
+ #output {
61
+ height: 500px;
62
+ overflow: auto;
63
+ border: 1px solid #ccc;
64
+ }
65
+ """
66
+
67
+
68
+ single_task_list =[
69
+ "ScreenQA"
70
+ ]
71
+
72
+
73
+ with gr.Blocks(css=css) as demo:
74
+ gr.Markdown(DESCRIPTION)
75
+ with gr.Tab(label="Florence-2 Input"):
76
+ with gr.Row():
77
+ with gr.Column():
78
+ input_img = gr.Image(label="Input Picture")
79
+ model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="maxiw/Florence-2-ScreenQA-base")
80
+ task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="ScreenQA")
81
+ text_input = gr.Textbox(label="Question")
82
+ submit_btn = gr.Button(value="Submit")
83
+ with gr.Column():
84
+ output_text = gr.Textbox(label="Output Text")
85
+
86
+ gr.Examples(
87
+ examples=[
88
+ ["image1.jpg", "ScreenQA", "What is the version of the settings?"],
89
+ ["image1.jpg", "ScreenQA", "What is the state of use lower resolution images?"],
90
+ ["image2.jpg", "ScreenQA", "How much is the discount for the product?"]
91
+ ],
92
+ inputs=[input_img, task_prompt, text_input],
93
+ outputs=[output_text],
94
+ fn=process_image,
95
+ cache_examples=True,
96
+ label="Try examples"
97
+ )
98
+
99
+ submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text])
100
+
101
+ demo.launch(debug=True)
image1.jpg ADDED
image2.jpg ADDED
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=23.0.0
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ spaces
2
+ transformers
3
+ timm