paudelanil commited on
Commit
f4f10a9
·
verified ·
1 Parent(s): 211ccfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -6
app.py CHANGED
@@ -3,22 +3,60 @@ from transformers import VisionEncoderDecoderModel, TrOCRProcessor,AutoTokenizer
3
  from PIL import Image
4
  import torch
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
8
- model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
9
- feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
 
 
 
 
 
 
 
10
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
11
 
12
- model.to(device)
13
  def predict(image):
14
  # Preprocess the image
15
  image = Image.open(image).convert("RGB")
16
  image = preprocess_image(image)
17
- pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
18
 
19
  # Generate text from the image
20
- generated_ids = model.generate(pixel_values)
21
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
22
 
23
  return generated_text
24
 
 
3
  from PIL import Image
4
  import torch
5
 
6
+ def preprocess_image(image):
7
+ # Resize while maintaining aspect ratio
8
+ target_size = (224, 224)
9
+ original_size = image.size
10
+
11
+ # Calculate the new size while maintaining aspect ratio
12
+ aspect_ratio = original_size[0] / original_size[1]
13
+ if aspect_ratio > 1: # Width is greater than height
14
+ new_width = target_size[0]
15
+ new_height = int(target_size[0] / aspect_ratio)
16
+ else: # Height is greater than width
17
+ new_height = target_size[1]
18
+ new_width = int(target_size[1] * aspect_ratio)
19
+
20
+ # Resize the image
21
+ resized_img = image.resize((new_width, new_height))
22
+
23
+ # Calculate padding values
24
+ padding_width = target_size[0] - new_width
25
+ padding_height = target_size[1] - new_height
26
+
27
+ # Apply padding to center the resized image
28
+ pad_left = padding_width // 2
29
+ pad_top = padding_height // 2
30
+ pad_image = Image.new('RGB', target_size, (255, 255, 255)) # White background
31
+ pad_image.paste(resized_img, (pad_left, pad_top))
32
+ return pad_image
33
+
34
+
35
+ # Load model directly
36
+ from transformers import AutoTokenizer, AutoModel,ViTFeatureExtractor,TrOCRProcessor,VisionEncoderDecoderModel
37
 
38
  tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
39
+ model1 = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
40
+ feature_extractor1 = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
41
+
42
+ processor1 = TrOCRProcessor(feature_extractor=feature_extractor1, tokenizer=tokenizer)
43
+
44
+
45
+ # tokenizer = AutoTokenizer.from_pretrained("paudelanil/trocr-devanagari")
46
+ # model = VisionEncoderDecoderModel.from_pretrained("paudelanil/trocr-devanagari")
47
+ # feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
48
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
49
 
50
+ model1.to(device)
51
  def predict(image):
52
  # Preprocess the image
53
  image = Image.open(image).convert("RGB")
54
  image = preprocess_image(image)
55
+ pixel_values = processor1(image, return_tensors="pt").pixel_values.to(device)
56
 
57
  # Generate text from the image
58
+ generated_ids = model1.generate(pixel_values)
59
+ generated_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]
60
 
61
  return generated_text
62