Nitin00043 commited on
Commit
d2a9dfb
·
verified ·
1 Parent(s): bb3e221

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -1,42 +1,42 @@
1
- # import torch
2
- # from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
3
- # import gradio as gr
4
- # from PIL import Image
5
-
6
- # # Load model and processor
7
- # model_name = "google/pix2struct-docvqa-large"
8
- # model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
9
- # processor = Pix2StructProcessor.from_pretrained(model_name)
10
-
11
- # def process_image(image_path):
12
- # try:
13
- # # Load the image
14
- # image = Image.open(image_path).convert("RGB")
15
-
16
- # # Prepare the input
17
- # inputs = processor(images=image, text="What does this image say?", return_tensors="pt")
18
-
19
- # # Generate prediction
20
- # output = model.generate(**inputs)
21
-
22
- # # Decode the output
23
- # solution = processor.decode(output[0], skip_special_tokens=True)
24
- # return solution
25
-
26
- # except Exception as e:
27
- # return f"Error processing image: {str(e)}"
28
-
29
- # def predict(image):
30
- # """Handles image input for Gradio."""
31
- # return process_image(image)
32
-
33
- # # Gradio app
34
- # iface = gr.Interface(
35
- # fn=predict,
36
- # inputs=gr.Image(type="filepath"),
37
- # outputs="text",
38
- # title="Image Text Solution"
39
- # )
40
-
41
- # if __name__ == "__main__":
42
- # iface.launch()
 
1
+ import torch
2
+ from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
3
+ import gradio as gr
4
+ from PIL import Image
5
+
6
+ # Load model and processor
7
+ model_name = "google/pix2struct-docvqa-large"
8
+ model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
9
+ processor = Pix2StructProcessor.from_pretrained(model_name)
10
+
11
+ def process_image(image_path):
12
+ try:
13
+ # Load the image
14
+ image = Image.open(image_path).convert("RGB")
15
+
16
+ # Prepare the input
17
+ inputs = processor(images=image, text="What does this image say?", return_tensors="pt")
18
+
19
+ # Generate prediction
20
+ output = model.generate(**inputs)
21
+
22
+ # Decode the output
23
+ solution = processor.decode(output[0], skip_special_tokens=True)
24
+ return solution
25
+
26
+ except Exception as e:
27
+ return f"Error processing image: {str(e)}"
28
+
29
+ def predict(image):
30
+ """Handles image input for Gradio."""
31
+ return process_image(image)
32
+
33
+ # Gradio app
34
+ iface = gr.Interface(
35
+ fn=predict,
36
+ inputs=gr.Image(type="filepath"),
37
+ outputs="text",
38
+ title="Image Text Solution"
39
+ )
40
+
41
+ if __name__ == "__main__":
42
+ iface.launch()