Nitin00043 commited on
Commit
ddc7f6f
·
verified ·
1 Parent(s): d2a9dfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -42
app.py CHANGED
@@ -1,42 +1,42 @@
1
- import torch
2
- from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
3
- import gradio as gr
4
- from PIL import Image
5
-
6
- # Load model and processor
7
- model_name = "google/pix2struct-docvqa-large"
8
- model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
9
- processor = Pix2StructProcessor.from_pretrained(model_name)
10
-
11
- def process_image(image_path):
12
- try:
13
- # Load the image
14
- image = Image.open(image_path).convert("RGB")
15
-
16
- # Prepare the input
17
- inputs = processor(images=image, text="What does this image say?", return_tensors="pt")
18
-
19
- # Generate prediction
20
- output = model.generate(**inputs)
21
-
22
- # Decode the output
23
- solution = processor.decode(output[0], skip_special_tokens=True)
24
- return solution
25
-
26
- except Exception as e:
27
- return f"Error processing image: {str(e)}"
28
-
29
- def predict(image):
30
- """Handles image input for Gradio."""
31
- return process_image(image)
32
-
33
- # Gradio app
34
- iface = gr.Interface(
35
- fn=predict,
36
- inputs=gr.Image(type="filepath"),
37
- outputs="text",
38
- title="Image Text Solution"
39
- )
40
-
41
- if __name__ == "__main__":
42
- iface.launch()
 
1
+ # import torch
2
+ # from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
3
+ # import gradio as gr
4
+ # from PIL import Image
5
+
6
+ # # Load model and processor
7
+ # model_name = "google/pix2struct-docvqa-large"
8
+ # model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
9
+ # processor = Pix2StructProcessor.from_pretrained(model_name)
10
+
11
+ # def process_image(image_path):
12
+ # try:
13
+ # # Load the image
14
+ # image = Image.open(image_path).convert("RGB")
15
+
16
+ # # Prepare the input
17
+ # inputs = processor(images=image, text="What does this image say?", return_tensors="pt")
18
+
19
+ # # Generate prediction
20
+ # output = model.generate(**inputs)
21
+
22
+ # # Decode the output
23
+ # solution = processor.decode(output[0], skip_special_tokens=True)
24
+ # return solution
25
+
26
+ # except Exception as e:
27
+ # return f"Error processing image: {str(e)}"
28
+
29
+ # def predict(image):
30
+ # """Handles image input for Gradio."""
31
+ # return process_image(image)
32
+
33
+ # # Gradio app
34
+ # iface = gr.Interface(
35
+ # fn=predict,
36
+ # inputs=gr.Image(type="filepath"),
37
+ # outputs="text",
38
+ # title="Image Text Solution"
39
+ # )
40
+
41
+ # if __name__ == "__main__":
42
+ # iface.launch()