paudelanil commited on
Commit
abd170d
·
verified ·
1 Parent(s): 652b51a

change app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -4
app.py CHANGED
@@ -3,13 +3,46 @@ from transformers import VisionEncoderDecoderModel, TrOCRProcessor
3
  from PIL import Image
4
 
5
  # Load the model and processor from Hugging Face
6
- model = VisionEncoderDecoderModel.from_pretrained("paudelanil/denvagari-TrOCR")
7
- processor = TrOCRProcessor.from_pretrained("paudelanil/denvagari-TrOCR")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def predict(image):
10
  # Preprocess the image
11
  image = Image.open(image).convert("RGB")
12
- pixel_values = processor(image, return_tensors="pt").pixel_values
 
13
 
14
  # Generate text from the image
15
  generated_ids = model.generate(pixel_values)
@@ -27,4 +60,4 @@ interface = gr.Interface(
27
  )
28
 
29
  # Launch the interface
30
- interface.launch()
 
3
  from PIL import Image
4
 
5
  # Load the model and processor from Hugging Face
6
+ model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
7
+ processor = TrOCRProcessor.from_pretrained("paudelanil/trocr-devanagari")
8
+
9
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
+ model.to(device)
11
+ def preprocess_image(image):
12
+ # Resize while maintaining aspect ratio
13
+ target_size = (224, 224)
14
+ original_size = image.size
15
+
16
+ # Calculate the new size while maintaining aspect ratio
17
+ aspect_ratio = original_size[0] / original_size[1]
18
+ if aspect_ratio > 1: # Width is greater than height
19
+ new_width = target_size[0]
20
+ new_height = int(target_size[0] / aspect_ratio)
21
+ else: # Height is greater than width
22
+ new_height = target_size[1]
23
+ new_width = int(target_size[1] * aspect_ratio)
24
+
25
+ # Resize the image
26
+ resized_img = image.resize((new_width, new_height))
27
+
28
+ # Calculate padding values
29
+ padding_width = target_size[0] - new_width
30
+ padding_height = target_size[1] - new_height
31
+
32
+ # Apply padding to center the resized image
33
+ pad_left = padding_width // 2
34
+ pad_top = padding_height // 2
35
+ pad_image = Image.new('RGB', target_size, (255, 255, 255)) # White background
36
+ pad_image.paste(resized_img, (pad_left, pad_top))
37
+
38
+ return pad_image
39
+
40
 
41
  def predict(image):
42
  # Preprocess the image
43
  image = Image.open(image).convert("RGB")
44
+ image = preprocess_image(image)
45
+ pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
46
 
47
  # Generate text from the image
48
  generated_ids = model.generate(pixel_values)
 
60
  )
61
 
62
  # Launch the interface
63
+ interface.launch(share=True)