firetac commited on
Commit
a0004ee
·
verified ·
1 Parent(s): 47260a8

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ processed_images/processed_image.jpg filter=lfs diff=lfs merge=lfs -text
ImagesProcessing.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import matplotlib.pyplot as plt
3
+ from super_image import EdsrModel, ImageLoader
4
+ from PIL import Image
5
+ def preprocess_image(image_path):
6
+ img = cv2.imread(image_path)
7
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
8
+ return img
9
+ def show_image(img):
10
+ plt.imshow(img, cmap='gray')
11
+ plt.axis('off')
12
+ plt.show()
13
+ def save_processed_image(img):
14
+ output_path = "Projects/HandwritingOCR/processed_images/processed_image.jpg"
15
+ cv2.imwrite(output_path, img)
16
+ return output_path
17
+ '''def createBoundingBox(img):
18
+ ocr_data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
19
+ n_boxes = len(ocr_data['level'])
20
+ for i in range(n_boxes):
21
+ if ocr_data['level'][i] == 3:
22
+ (x, y, w, h) = (ocr_data['left'][i], ocr_data['top'][i], ocr_data['width'][i], ocr_data['height'][i])
23
+ cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 5)
24
+ plt.imshow(img, cmap='gray')
25
+ plt.axis('off')
26
+ plt.show()
27
+ '''
28
+
29
+ def super_resolution(img):
30
+ model = EdsrModel.from_pretrained('eugenesiow/edsr-base', scale=2)
31
+ pil_img = Image.fromarray(img)
32
+ inputs = ImageLoader.load_image(pil_img)
33
+ preds = model(inputs)
34
+
35
+ ImageLoader.save_image(preds, 'Projects/HandwritingOCR/processed_images/processed_image.jpg')
36
+ def process_image(image_path):
37
+ img = preprocess_image(image_path)
38
+ super_resolution(img)
39
+
40
+ if __name__ == "__main__":
41
+ image_path = "Projects/HandwritingOCR/captured_images/captured_image.jpg"
42
+ process_image(image_path)
OCRmodel.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from urllib3.exceptions import NotOpenSSLWarning
3
+
4
+ warnings.filterwarnings("ignore", category=NotOpenSSLWarning)
5
+ warnings.filterwarnings("ignore", category=FutureWarning)
6
+ warnings.filterwarnings("ignore", category=UserWarning, module='torch')
7
+ warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
8
+ import os
9
+ import numpy as np
10
+ import torch
11
+ import torchvision.transforms as T
12
+ from PIL import Image
13
+ from torchvision.transforms.functional import InterpolationMode
14
+ from transformers import AutoModel, AutoTokenizer
15
+ import matplotlib.pyplot as plt
16
+
17
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
18
+ IMAGENET_STD = (0.229, 0.224, 0.225)
19
+
20
+ #model_name = "5CD-AI/Vintern-1B-v2"
21
+ model_name = "5CD-AI/Vintern-1B-v3_5"
22
+ device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
23
+
24
+ def build_transform(input_size):
25
+ MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
26
+ transform = T.Compose([
27
+ T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
28
+ T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
29
+ T.ToTensor(),
30
+ T.Normalize(mean=MEAN, std=STD)
31
+ ])
32
+ return transform
33
+
34
+ def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
35
+ best_ratio_diff = float('inf')
36
+ best_ratio = (1, 1)
37
+ area = width * height
38
+ for ratio in target_ratios:
39
+ target_aspect_ratio = ratio[0] / ratio[1]
40
+ ratio_diff = abs(aspect_ratio - target_aspect_ratio)
41
+ if ratio_diff < best_ratio_diff:
42
+ best_ratio_diff = ratio_diff
43
+ best_ratio = ratio
44
+ elif ratio_diff == best_ratio_diff:
45
+ if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
46
+ best_ratio = ratio
47
+ return best_ratio
48
+
49
+ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
50
+ orig_width, orig_height = image.size
51
+ aspect_ratio = orig_width / orig_height
52
+
53
+ # calculate the existing image aspect ratio
54
+ target_ratios = set(
55
+ (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
56
+ i * j <= max_num and i * j >= min_num)
57
+ target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
58
+
59
+ # find the closest aspect ratio to the target
60
+ target_aspect_ratio = find_closest_aspect_ratio(
61
+ aspect_ratio, target_ratios, orig_width, orig_height, image_size)
62
+
63
+ # calculate the target width and height
64
+ target_width = image_size * target_aspect_ratio[0]
65
+ target_height = image_size * target_aspect_ratio[1]
66
+ blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
67
+
68
+ # resize the image
69
+ resized_img = image.resize((target_width, target_height))
70
+ processed_images = []
71
+ for i in range(blocks):
72
+ box = (
73
+ (i % (target_width // image_size)) * image_size,
74
+ (i // (target_width // image_size)) * image_size,
75
+ ((i % (target_width // image_size)) + 1) * image_size,
76
+ ((i // (target_width // image_size)) + 1) * image_size
77
+ )
78
+ # split the image
79
+ split_img = resized_img.crop(box)
80
+ processed_images.append(split_img)
81
+ assert len(processed_images) == blocks
82
+ if use_thumbnail and len(processed_images) != 1:
83
+ thumbnail_img = image.resize((image_size, image_size))
84
+ processed_images.append(thumbnail_img)
85
+ return processed_images
86
+
87
+ def load_image(image_file, input_size=448, max_num=12):
88
+ image = Image.open(image_file).convert('RGB')
89
+ transform = build_transform(input_size=input_size)
90
+ images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
91
+ pixel_values = [transform(image) for image in images]
92
+ pixel_values = torch.stack(pixel_values)
93
+ return pixel_values
94
+
95
+ def truncate_tokens(tokens, max_length):
96
+ if len(tokens) > max_length:
97
+ tokens = tokens[:max_length]
98
+ return tokens
99
+
100
+ def OCRing(image_URL):
101
+ test_image = image_URL
102
+ pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).to(device)
103
+ generation_config = dict(max_new_tokens=512, do_sample=False, num_beams=3, repetition_penalty=3.5)
104
+
105
+ question = '<image>\n Chỉ xuất ra kí tự có trong văn bản, không thêm bớt.'
106
+
107
+ response = model.chat(tokenizer, pixel_values, question, generation_config)
108
+ print(f'User: {question}\nAssistant: {response}')
109
+ return response
110
+
111
+ try:
112
+ model = AutoModel.from_pretrained(
113
+ model_name,
114
+ torch_dtype=torch.bfloat16,
115
+ low_cpu_mem_usage=True,
116
+ trust_remote_code=True,
117
+ use_flash_attn=False,
118
+ ).eval().to(device)
119
+ except:
120
+ model = AutoModel.from_pretrained(
121
+ model_name,
122
+ torch_dtype=torch.bfloat16,
123
+ low_cpu_mem_usage=True,
124
+ trust_remote_code=True
125
+ ).eval().to(device)
126
+
127
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
128
+
129
+ if __name__ == "__main__":
130
+ test_image = "Projects/HandwritingOCR/captured_images/captured_image.jpg"
131
+ pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).to(device)
132
+ generation_config = dict(max_new_tokens=512, do_sample=False, num_beams=3, repetition_penalty=3.5)
133
+
134
+ question = '<image>\n Input: ảnh, Output: Chỉ xuất ra những kí tự có trong ảnh, không thêm bớt.'
135
+
136
+ response = model.chat(tokenizer, pixel_values, question, generation_config)
137
+ print(f'User: {question}\nAssistant: {response}')
138
+ #dùng dòng lệnh này trong terminal: export PYTORCH_ENABLE_MPS_FALLBACK=1
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import base64
6
+ import OCRmodel as ocr
7
+ import ImagesProcessing as ip
8
+ app = Flask(__name__)
9
+
10
+ # Tạo thư mục lưu ảnh nếu chưa có
11
+ save_dir = "Projects/HandwritingOCR/captured_images"
12
+ if not os.path.exists(save_dir):
13
+ os.makedirs(save_dir)
14
+ ocr_process = None
15
+
16
+ @app.route('/')
17
+ def home():
18
+ return render_template('home.html')
19
+
20
+ @app.route('/index')
21
+ def index():
22
+ return render_template('index.html')
23
+
24
+ @app.route('/other')
25
+ def other():
26
+ return render_template('other.html')
27
+
28
+ @app.route('/capture', methods=['POST'])
29
+ def capture():
30
+ data = request.json
31
+ image_data = data['image']
32
+ image_data = image_data.split(",")[1]
33
+ image_data = np.frombuffer(base64.b64decode(image_data), np.uint8)
34
+ image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
35
+ image_path = os.path.join(save_dir, "captured_image.jpg")
36
+ cv2.imwrite(image_path, image)
37
+ return jsonify({"message": "Image saved successfully!"})
38
+
39
+ @app.route('/save_pasted_image', methods=['POST'])
40
+ def save_pasted_image():
41
+ data = request.json
42
+ image_data = data['image']
43
+ image_data = image_data.split(",")[1]
44
+ image_data = np.frombuffer(base64.b64decode(image_data), np.uint8)
45
+ image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
46
+ image_path = os.path.join(save_dir, "pasted_image.jpg")
47
+ cv2.imwrite(image_path, image)
48
+ return jsonify({"message": "Pasted image saved successfully!"})
49
+
50
+ @app.route('/camocr', methods=['POST'])
51
+ def camocr():
52
+ image_path = os.path.join(save_dir, "captured_image.jpg")
53
+ result = DoOCR(image_path)
54
+ return jsonify({"result": result})
55
+
56
+ @app.route('/imgocr', methods=['POST'])
57
+ def imgocr():
58
+ image_path = os.path.join(save_dir, "pasted_image.jpg")
59
+ result = DoOCR(image_path)
60
+ return jsonify({"result": result})
61
+
62
+
63
+ def processImage(image_path):
64
+ ip.process_image(image_path)
65
+ #output_path = ip.save_processed_image(img)
66
+ #return output_path
67
+
68
+ def DoOCR(image_path):
69
+ processImage(image_path)
70
+ output_path = "Projects/HandwritingOCR/processed_images/processed_image.jpg"
71
+ return ocr.OCRing(output_path)
72
+
73
+
74
+ if __name__ == '__main__':
75
+ app.run(debug=True)
76
+ #dùng: export PYTORCH_ENABLE_MPS_FALLBACK=1 trong terminal
captured_images/captured_image.jpg ADDED
captured_images/pasted_image.jpg ADDED
main.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #dùng: export PYTORCH_ENABLE_MPS_FALLBACK=1 trong terminal
2
+ import OCRmodel as ocr
3
+ image_path = '/Users/lequanhuy/Documents/Code/Visual Code/Projects/HandwritingOCR/captured_images/captured_image.jpg'
4
+ print(ocr.OCRing(image_path))
processed_images/processed_image.jpg ADDED

Git LFS Details

  • SHA256: e1d592dbec49c3e83f1ead7e113b9c456d7101775f45e3b1c66668b3411c156c
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
static/css/style.css ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Roboto', sans-serif;
3
+ background-color: #f0f0f0;
4
+ margin: 0;
5
+ padding: 0;
6
+ display: flex;
7
+ justify-content: center;
8
+ align-items: center;
9
+ height: 100%;
10
+ }
11
+
12
+ .container {
13
+ background-color: #fff;
14
+ padding: 30px;
15
+ border-radius: 10px;
16
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);
17
+ text-align: center;
18
+ width: 90%;
19
+ max-width: 900px;
20
+ }
21
+
22
+ h1 {
23
+ color: #333;
24
+ margin-bottom: 20px;
25
+ font-size: 2.5em;
26
+ }
27
+
28
+ p {
29
+ color: #666;
30
+ font-size: 1.2em;
31
+ }
32
+
33
+ .button-container {
34
+ margin-bottom: 20px;
35
+ }
36
+
37
+ button {
38
+ background-color: #007bff;
39
+ color: #fff;
40
+ border: none;
41
+ padding: 15px 30px;
42
+ margin: 10px;
43
+ border-radius: 5px;
44
+ cursor: pointer;
45
+ font-size: 1em;
46
+ transition: background-color 0.3s ease;
47
+ }
48
+
49
+ button:hover {
50
+ background-color: #0056b3;
51
+ }
52
+
53
+ .paste-container {
54
+ margin-top: 20px;
55
+ }
56
+
57
+ #paste-box {
58
+ border: 2px dashed #007bff;
59
+ border-radius: 5px;
60
+ padding: 20px;
61
+ min-height: 150px;
62
+ cursor: text;
63
+ background-color: #f9f9f9;
64
+ transition: background-color 0.3s ease;
65
+ }
66
+
67
+ #paste-box:focus {
68
+ background-color: #e9f7ff;
69
+ }
70
+
71
+ #pasted-image {
72
+ border: 2px solid #007bff;
73
+ border-radius: 5px;
74
+ max-width: 100%;
75
+ margin-top: 20px;
76
+ }
77
+
78
+ .ocr-result {
79
+ background-color: #e9ecef;
80
+ padding: 20px;
81
+ border-radius: 5px;
82
+ border: 1px solid #ced4da;
83
+ color: #495057;
84
+ font-size: 1em;
85
+ text-align: left;
86
+ white-space: pre-wrap;
87
+ margin-top: 20px;
88
+ }
89
+
90
+ .loading {
91
+ font-size: 1.5em;
92
+ color: #007bff;
93
+ margin-top: 20px;
94
+ }
static/js/other.js ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let loadingInterval;
2
+
3
+ function handlePaste(event) {
4
+ const items = (event.clipboardData || event.originalEvent.clipboardData).items;
5
+ for (const item of items) {
6
+ if (item.type.indexOf("image") === 0) {
7
+ const blob = item.getAsFile();
8
+ const reader = new FileReader();
9
+ reader.onload = function(event) {
10
+ const img = document.getElementById("pasted-image");
11
+ img.src = event.target.result;
12
+ img.style.display = "block";
13
+ };
14
+ reader.readAsDataURL(blob);
15
+ }
16
+ }
17
+ }
18
+
19
+ function saveImage() {
20
+ const img = document.getElementById("pasted-image");
21
+ if (img.src) {
22
+ fetch('/save_pasted_image', {
23
+ method: 'POST',
24
+ headers: {
25
+ 'Content-Type': 'application/json'
26
+ },
27
+ body: JSON.stringify({ image: img.src })
28
+ })
29
+ .then(response => response.json())
30
+ .then(data => {
31
+ console.log(data.message);
32
+ alert("Image saved successfully!");
33
+ })
34
+ .catch(console.error);
35
+ } else {
36
+ alert("No image to save!");
37
+ }
38
+ }
39
+
40
+ function performOCR() {
41
+ clearOCRResult();
42
+ showLoading();
43
+ disableButton();
44
+ fetch('/imgocr', {
45
+ method: 'POST',
46
+ headers: {
47
+ 'Content-Type': 'application/json'
48
+ }
49
+ })
50
+ .then(response => response.json())
51
+ .then(data => {
52
+ document.getElementById('ocr-result').innerText = data.result;
53
+ hideLoading();
54
+ enableButton();
55
+ })
56
+ .catch(error => {
57
+ console.error(error);
58
+ hideLoading();
59
+ enableButton();
60
+ });
61
+ }
62
+
63
+ function showLoading() {
64
+ const loadingElement = document.getElementById('loading');
65
+ loadingElement.style.display = 'block';
66
+ let dots = 0;
67
+ loadingInterval = setInterval(() => {
68
+ dots = (dots + 1) % 4;
69
+ loadingElement.innerText = 'Loading' + '.'.repeat(dots);
70
+ }, 500);
71
+ }
72
+
73
+ function hideLoading() {
74
+ clearInterval(loadingInterval);
75
+ const loadingElement = document.getElementById('loading');
76
+ loadingElement.style.display = 'none';
77
+ }
78
+
79
+ function clearOCRResult() {
80
+ document.getElementById('ocr-result').innerText = '';
81
+ }
82
+
83
+ function disableButton() {
84
+ const button = document.querySelector('button[onclick="performOCR()"]');
85
+ button.disabled = true;
86
+ button.style.backgroundColor = '#cccccc';
87
+ }
88
+
89
+ function enableButton() {
90
+ const button = document.querySelector('button[onclick="performOCR()"]');
91
+ button.disabled = false;
92
+ button.style.backgroundColor = '#007bff';
93
+ }
static/js/script.js ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let video;
2
+
3
+ function startWebcam() {
4
+ video = document.getElementById('webcam');
5
+ navigator.mediaDevices.getUserMedia({ video: true })
6
+ .then(stream => {
7
+ video.srcObject = stream;
8
+ })
9
+ .catch(console.error);
10
+ }
11
+
12
+ function captureImage() {
13
+ const canvas = document.createElement('canvas');
14
+ canvas.width = video.videoWidth;
15
+ canvas.height = video.videoHeight;
16
+ const context = canvas.getContext('2d');
17
+ context.drawImage(video, 0, 0, canvas.width, canvas.height);
18
+ const imageData = canvas.toDataURL('image/jpeg');
19
+ fetch('/capture', {
20
+ method: 'POST',
21
+ headers: {
22
+ 'Content-Type': 'application/json'
23
+ },
24
+ body: JSON.stringify({ image: imageData })
25
+ })
26
+ .then(response => response.json())
27
+ .then(data => {
28
+ console.log(data.message);
29
+ document.getElementById('captured-image').src = imageData;
30
+ document.getElementById('captured-image').style.display = 'block';
31
+ })
32
+ .catch(console.error);
33
+ }
34
+
35
+ function performOCR() {
36
+ clearOCRResult();
37
+ showLoading();
38
+ disableButton();
39
+ fetch('/camocr', {
40
+ method: 'POST',
41
+ headers: {
42
+ 'Content-Type': 'application/json'
43
+ }
44
+ })
45
+ .then(response => response.json())
46
+ .then(data => {
47
+ document.getElementById('ocr-result').innerText = data.result;
48
+ hideLoading();
49
+ enableButton();
50
+ })
51
+ .catch(error => {
52
+ console.error(error);
53
+ hideLoading();
54
+ enableButton();
55
+ });
56
+ }
57
+
58
+ function showLoading() {
59
+ const loadingElement = document.getElementById('loading');
60
+ loadingElement.style.display = 'block';
61
+ let dots = 0;
62
+ loadingInterval = setInterval(() => {
63
+ dots = (dots + 1) % 4;
64
+ loadingElement.innerText = 'Loading' + '.'.repeat(dots);
65
+ }, 500);
66
+ }
67
+
68
+ function hideLoading() {
69
+ clearInterval(loadingInterval);
70
+ const loadingElement = document.getElementById('loading');
71
+ loadingElement.style.display = 'none';
72
+ }
73
+
74
+ function clearOCRResult() {
75
+ document.getElementById('ocr-result').innerText = '';
76
+ }
77
+
78
+ function disableButton() {
79
+ const button = document.querySelector('button[onclick="performOCR()"]');
80
+ button.disabled = true;
81
+ button.style.backgroundColor = '#cccccc';
82
+ }
83
+
84
+ function enableButton() {
85
+ const button = document.querySelector('button[onclick="performOCR()"]');
86
+ button.disabled = false;
87
+ button.style.backgroundColor = '#007bff';
88
+ }
templates/home.html ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Home</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Welcome to VN OCR</h1>
12
+ <div class="button-container">
13
+ <button type="button" onclick="window.location.href='/index'">Go to Webcam Capture</button>
14
+ <button type="button" onclick="window.location.href='/other'">Go to Image OCR</button>
15
+ </div>
16
+ </div>
17
+ </body>
18
+ </html>
templates/index.html ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Webcam Capture</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>VN OCR</h1>
12
+ <div class="button-container">
13
+ <button type="button" onclick="startWebcam()">Start Webcam</button>
14
+ <button type="button" onclick="captureImage()">Capture Image</button>
15
+ <button type="button" onclick="performOCR()">Perform OCR</button>
16
+ <button type="button" onclick="window.location.href='/'">Go to Home</button>
17
+ </div>
18
+ <div id="webcam-container">
19
+ <video id="webcam" autoplay playsinline width="640" height="480"></video>
20
+ </div>
21
+ <div id="image-container">
22
+ <img id="captured-image" src="" alt="Captured Image" style="display: none;">
23
+ </div>
24
+ <div id="ocr-result" class="ocr-result"></div>
25
+ <div id="loading" class="loading" style="display: none;">Loading</div>
26
+ </div>
27
+ <script src="/static/js/script.js"></script>
28
+ </body>
29
+ </html>
templates/other.html ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Image OCR</title>
7
+ <link rel="stylesheet" href="/static/css/style.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Image OCR</h1>
12
+ <div class="button-container">
13
+ <button type="button" onclick="window.location.href='/'">Go to Home</button>
14
+ <button type="button" onclick="performOCR()">Perform OCR</button>
15
+ </div>
16
+ <div class="paste-container">
17
+ <h2>Paste your image here</h2>
18
+ <div id="paste-box" contenteditable="true" onpaste="handlePaste(event)">
19
+ <p></p>
20
+ </div>
21
+ <img id="pasted-image" src="" alt="Pasted Image" style="display: none;">
22
+ <button type="button" onclick="saveImage()">Save Image</button>
23
+ </div>
24
+ <div id="ocr-result" class="ocr-result"></div>
25
+ <div id="loading" class="loading" style="display: none;">Loading</div>
26
+ </div>
27
+ <script src="/static/js/other.js"></script>
28
+ </body>
29
+ </html>