Spaces:

firetac
/

FireSpace

Running

App Files Files Community

firetac commited on Mar 5

Commit

c8d7e42

verified ·

1 Parent(s): a642897

Delete HandwritingOCR

Browse files

Files changed (14) hide show

HandwritingOCR/.DS_Store +0 -0
HandwritingOCR/ImagesProcessing.py +0 -42
HandwritingOCR/OCRmodel.py +0 -138
HandwritingOCR/app.py +0 -76
HandwritingOCR/captured_images/captured_image.jpg +0 -0
HandwritingOCR/captured_images/pasted_image.jpg +0 -0
HandwritingOCR/main.py +0 -4
HandwritingOCR/processed_images/processed_image.jpg +0 -3
HandwritingOCR/static/css/style.css +0 -94
HandwritingOCR/static/js/other.js +0 -93
HandwritingOCR/static/js/script.js +0 -88
HandwritingOCR/templates/home.html +0 -18
HandwritingOCR/templates/index.html +0 -29
HandwritingOCR/templates/other.html +0 -29

HandwritingOCR/.DS_Store DELETED Viewed

Binary file (6.15 kB)

HandwritingOCR/ImagesProcessing.py DELETED Viewed

@@ -1,42 +0,0 @@
-import cv2
-import matplotlib.pyplot as plt
-from super_image import EdsrModel, ImageLoader
-from PIL import Image
-def preprocess_image(image_path):
-    img = cv2.imread(image_path)
-    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    return img
-def show_image(img):
-    plt.imshow(img, cmap='gray')
-    plt.axis('off')
-    plt.show()
-def save_processed_image(img):
-    output_path = "Projects/HandwritingOCR/processed_images/processed_image.jpg"
-    cv2.imwrite(output_path, img)
-    return output_path
-'''def createBoundingBox(img):
-    ocr_data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
-    n_boxes = len(ocr_data['level'])
-    for i in range(n_boxes):
-        if ocr_data['level'][i] == 3:
-            (x, y, w, h) = (ocr_data['left'][i], ocr_data['top'][i], ocr_data['width'][i], ocr_data['height'][i])
-            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 5)
-    plt.imshow(img, cmap='gray')
-    plt.axis('off')
-    plt.show()
-'''
-def super_resolution(img):
-    model = EdsrModel.from_pretrained('eugenesiow/edsr-base', scale=2)
-    pil_img = Image.fromarray(img)
-    inputs = ImageLoader.load_image(pil_img)
-    preds = model(inputs)
-    ImageLoader.save_image(preds, 'Projects/HandwritingOCR/processed_images/processed_image.jpg')
-def process_image(image_path):
-    img = preprocess_image(image_path)
-    super_resolution(img)
-if __name__ == "__main__":
-    image_path = "Projects/HandwritingOCR/captured_images/captured_image.jpg"
-    process_image(image_path)

HandwritingOCR/OCRmodel.py DELETED Viewed

@@ -1,138 +0,0 @@
-import warnings
-from urllib3.exceptions import NotOpenSSLWarning
-warnings.filterwarnings("ignore", category=NotOpenSSLWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
-warnings.filterwarnings("ignore", category=UserWarning, module='torch')
-warnings.filterwarnings("ignore", category=UserWarning, module='transformers')
-import os
-import numpy as np
-import torch
-import torchvision.transforms as T
-from PIL import Image
-from torchvision.transforms.functional import InterpolationMode
-from transformers import AutoModel, AutoTokenizer
-import matplotlib.pyplot as plt
-IMAGENET_MEAN = (0.485, 0.456, 0.406)
-IMAGENET_STD = (0.229, 0.224, 0.225)
-#model_name = "5CD-AI/Vintern-1B-v2"
-model_name = "5CD-AI/Vintern-1B-v3_5"
-device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
-def build_transform(input_size):
-    MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
-    transform = T.Compose([
-        T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
-        T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
-        T.ToTensor(),
-        T.Normalize(mean=MEAN, std=STD)
-    ])
-    return transform
-def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
-    best_ratio_diff = float('inf')
-    best_ratio = (1, 1)
-    area = width * height
-    for ratio in target_ratios:
-        target_aspect_ratio = ratio[0] / ratio[1]
-        ratio_diff = abs(aspect_ratio - target_aspect_ratio)
-        if ratio_diff < best_ratio_diff:
-            best_ratio_diff = ratio_diff
-            best_ratio = ratio
-        elif ratio_diff == best_ratio_diff:
-            if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
-                best_ratio = ratio
-    return best_ratio
-def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
-    orig_width, orig_height = image.size
-    aspect_ratio = orig_width / orig_height
-    # calculate the existing image aspect ratio
-    target_ratios = set(
-        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
-        i * j <= max_num and i * j >= min_num)
-    target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
-    # find the closest aspect ratio to the target
-    target_aspect_ratio = find_closest_aspect_ratio(
-        aspect_ratio, target_ratios, orig_width, orig_height, image_size)
-    # calculate the target width and height
-    target_width = image_size * target_aspect_ratio[0]
-    target_height = image_size * target_aspect_ratio[1]
-    blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
-    # resize the image
-    resized_img = image.resize((target_width, target_height))
-    processed_images = []
-    for i in range(blocks):
-        box = (
-            (i % (target_width // image_size)) * image_size,
-            (i // (target_width // image_size)) * image_size,
-            ((i % (target_width // image_size)) + 1) * image_size,
-            ((i // (target_width // image_size)) + 1) * image_size
-        )
-        # split the image
-        split_img = resized_img.crop(box)
-        processed_images.append(split_img)
-    assert len(processed_images) == blocks
-    if use_thumbnail and len(processed_images) != 1:
-        thumbnail_img = image.resize((image_size, image_size))
-        processed_images.append(thumbnail_img)
-    return processed_images
-def load_image(image_file, input_size=448, max_num=12):
-    image = Image.open(image_file).convert('RGB')
-    transform = build_transform(input_size=input_size)
-    images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
-    pixel_values = [transform(image) for image in images]
-    pixel_values = torch.stack(pixel_values)
-    return pixel_values
-def truncate_tokens(tokens, max_length):
-    if len(tokens) > max_length:
-        tokens = tokens[:max_length]
-    return tokens
-def OCRing(image_URL):
-    test_image = image_URL
-    pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).to(device)
-    generation_config = dict(max_new_tokens=512, do_sample=False, num_beams=3, repetition_penalty=3.5)
-    question = '<image>\n Chỉ xuất ra kí tự có trong văn bản, không thêm bớt.'
-    response = model.chat(tokenizer, pixel_values, question, generation_config)
-    print(f'User: {question}\nAssistant: {response}')
-    return response
-try:
-    model = AutoModel.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True,
-        use_flash_attn=False,
-    ).eval().to(device)
-except:
-    model = AutoModel.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True
-    ).eval().to(device)
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
-if __name__ == "__main__":
-    test_image = "Projects/HandwritingOCR/captured_images/captured_image.jpg"
-    pixel_values = load_image(test_image, max_num=6).to(torch.bfloat16).to(device)
-    generation_config = dict(max_new_tokens=512, do_sample=False, num_beams=3, repetition_penalty=3.5)
-    question = '<image>\n Input: ảnh, Output: Chỉ xuất ra những kí tự có trong ảnh, không thêm bớt.'
-    response = model.chat(tokenizer, pixel_values, question, generation_config)
-    print(f'User: {question}\nAssistant: {response}')
-#dùng dòng lệnh này trong terminal: export PYTORCH_ENABLE_MPS_FALLBACK=1

HandwritingOCR/app.py DELETED Viewed

@@ -1,76 +0,0 @@
-from flask import Flask, render_template, request, jsonify
-import cv2
-import numpy as np
-import os
-import base64
-import OCRmodel as ocr
-import ImagesProcessing as ip
-app = Flask(__name__)
-# Tạo thư mục lưu ảnh nếu chưa có
-save_dir = "Projects/HandwritingOCR/captured_images"
-if not os.path.exists(save_dir):
-    os.makedirs(save_dir)
-ocr_process = None
-@app.route('/')
-def home():
-    return render_template('home.html')
-@app.route('/index')
-def index():
-    return render_template('index.html')
-@app.route('/other')
-def other():
-    return render_template('other.html')
-@app.route('/capture', methods=['POST'])
-def capture():
-    data = request.json
-    image_data = data['image']
-    image_data = image_data.split(",")[1]
-    image_data = np.frombuffer(base64.b64decode(image_data), np.uint8)
-    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
-    image_path = os.path.join(save_dir, "captured_image.jpg")
-    cv2.imwrite(image_path, image)
-    return jsonify({"message": "Image saved successfully!"})
-@app.route('/save_pasted_image', methods=['POST'])
-def save_pasted_image():
-    data = request.json
-    image_data = data['image']
-    image_data = image_data.split(",")[1]
-    image_data = np.frombuffer(base64.b64decode(image_data), np.uint8)
-    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
-    image_path = os.path.join(save_dir, "pasted_image.jpg")
-    cv2.imwrite(image_path, image)
-    return jsonify({"message": "Pasted image saved successfully!"})
-@app.route('/camocr', methods=['POST'])
-def camocr():
-    image_path = os.path.join(save_dir, "captured_image.jpg")
-    result = DoOCR(image_path)
-    return jsonify({"result": result})
-@app.route('/imgocr', methods=['POST'])
-def imgocr():
-    image_path = os.path.join(save_dir, "pasted_image.jpg")
-    result = DoOCR(image_path)
-    return jsonify({"result": result})
-def processImage(image_path):
-    ip.process_image(image_path)
-    #output_path = ip.save_processed_image(img)
-    #return output_path
-def DoOCR(image_path):
-    processImage(image_path)
-    output_path = "Projects/HandwritingOCR/processed_images/processed_image.jpg"
-    return ocr.OCRing(output_path)
-if __name__ == '__main__':
-    app.run(debug=True)
-#dùng: export PYTORCH_ENABLE_MPS_FALLBACK=1 trong terminal

HandwritingOCR/captured_images/captured_image.jpg DELETED Viewed

Binary file (58.4 kB)

HandwritingOCR/captured_images/pasted_image.jpg DELETED Viewed

Binary file (52.3 kB)

HandwritingOCR/main.py DELETED Viewed

@@ -1,4 +0,0 @@
-#dùng: export PYTORCH_ENABLE_MPS_FALLBACK=1 trong terminal
-import OCRmodel as ocr
-image_path = '/Users/lequanhuy/Documents/Code/Visual Code/Projects/HandwritingOCR/captured_images/captured_image.jpg'
-print(ocr.OCRing(image_path))

HandwritingOCR/processed_images/processed_image.jpg DELETED Viewed

Git LFS Details

SHA256: e1d592dbec49c3e83f1ead7e113b9c456d7101775f45e3b1c66668b3411c156c
Pointer size: 131 Bytes
Size of remote file: 164 kB

HandwritingOCR/static/css/style.css DELETED Viewed

@@ -1,94 +0,0 @@
-body {
-    font-family: 'Roboto', sans-serif;
-    background-color: #f0f0f0;
-    margin: 0;
-    padding: 0;
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: 100%;
-}
-.container {
-    background-color: #fff;
-    padding: 30px;
-    border-radius: 10px;
-    box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);
-    text-align: center;
-    width: 90%;
-    max-width: 900px;
-}
-h1 {
-    color: #333;
-    margin-bottom: 20px;
-    font-size: 2.5em;
-}
-p {
-    color: #666;
-    font-size: 1.2em;
-}
-.button-container {
-    margin-bottom: 20px;
-}
-button {
-    background-color: #007bff;
-    color: #fff;
-    border: none;
-    padding: 15px 30px;
-    margin: 10px;
-    border-radius: 5px;
-    cursor: pointer;
-    font-size: 1em;
-    transition: background-color 0.3s ease;
-}
-button:hover {
-    background-color: #0056b3;
-}
-.paste-container {
-    margin-top: 20px;
-}
-#paste-box {
-    border: 2px dashed #007bff;
-    border-radius: 5px;
-    padding: 20px;
-    min-height: 150px;
-    cursor: text;
-    background-color: #f9f9f9;
-    transition: background-color 0.3s ease;
-}
-#paste-box:focus {
-    background-color: #e9f7ff;
-}
-#pasted-image {
-    border: 2px solid #007bff;
-    border-radius: 5px;
-    max-width: 100%;
-    margin-top: 20px;
-}
-.ocr-result {
-    background-color: #e9ecef;
-    padding: 20px;
-    border-radius: 5px;
-    border: 1px solid #ced4da;
-    color: #495057;
-    font-size: 1em;
-    text-align: left;
-    white-space: pre-wrap;
-    margin-top: 20px;
-}
-.loading {
-    font-size: 1.5em;
-    color: #007bff;
-    margin-top: 20px;
-}

HandwritingOCR/static/js/other.js DELETED Viewed

@@ -1,93 +0,0 @@
-let loadingInterval;
-function handlePaste(event) {
-    const items = (event.clipboardData || event.originalEvent.clipboardData).items;
-    for (const item of items) {
-        if (item.type.indexOf("image") === 0) {
-            const blob = item.getAsFile();
-            const reader = new FileReader();
-            reader.onload = function(event) {
-                const img = document.getElementById("pasted-image");
-                img.src = event.target.result;
-                img.style.display = "block";
-            };
-            reader.readAsDataURL(blob);
-        }
-    }
-}
-function saveImage() {
-    const img = document.getElementById("pasted-image");
-    if (img.src) {
-        fetch('/save_pasted_image', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json'
-            },
-            body: JSON.stringify({ image: img.src })
-        })
-        .then(response => response.json())
-        .then(data => {
-            console.log(data.message);
-            alert("Image saved successfully!");
-        })
-        .catch(console.error);
-    } else {
-        alert("No image to save!");
-    }
-}
-function performOCR() {
-    clearOCRResult();
-    showLoading();
-    disableButton();
-    fetch('/imgocr', {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json'
-        }
-    })
-    .then(response => response.json())
-    .then(data => {
-        document.getElementById('ocr-result').innerText = data.result;
-        hideLoading();
-        enableButton();
-    })
-    .catch(error => {
-        console.error(error);
-        hideLoading();
-        enableButton();
-    });
-}
-function showLoading() {
-    const loadingElement = document.getElementById('loading');
-    loadingElement.style.display = 'block';
-    let dots = 0;
-    loadingInterval = setInterval(() => {
-        dots = (dots + 1) % 4;
-        loadingElement.innerText = 'Loading' + '.'.repeat(dots);
-    }, 500);
-}
-function hideLoading() {
-    clearInterval(loadingInterval);
-    const loadingElement = document.getElementById('loading');
-    loadingElement.style.display = 'none';
-}
-function clearOCRResult() {
-    document.getElementById('ocr-result').innerText = '';
-}
-function disableButton() {
-    const button = document.querySelector('button[onclick="performOCR()"]');
-    button.disabled = true;
-    button.style.backgroundColor = '#cccccc';
-}
-function enableButton() {
-    const button = document.querySelector('button[onclick="performOCR()"]');
-    button.disabled = false;
-    button.style.backgroundColor = '#007bff';
-}

HandwritingOCR/static/js/script.js DELETED Viewed

@@ -1,88 +0,0 @@
-let video;
-function startWebcam() {
-    video = document.getElementById('webcam');
-    navigator.mediaDevices.getUserMedia({ video: true })
-        .then(stream => {
-            video.srcObject = stream;
-        })
-        .catch(console.error);
-}
-function captureImage() {
-    const canvas = document.createElement('canvas');
-    canvas.width = video.videoWidth;
-    canvas.height = video.videoHeight;
-    const context = canvas.getContext('2d');
-    context.drawImage(video, 0, 0, canvas.width, canvas.height);
-    const imageData = canvas.toDataURL('image/jpeg');
-    fetch('/capture', {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json'
-        },
-        body: JSON.stringify({ image: imageData })
-    })
-    .then(response => response.json())
-    .then(data => {
-        console.log(data.message);
-        document.getElementById('captured-image').src = imageData;
-        document.getElementById('captured-image').style.display = 'block';
-    })
-    .catch(console.error);
-}
-function performOCR() {
-    clearOCRResult();
-    showLoading();
-    disableButton();
-    fetch('/camocr', {
-        method: 'POST',
-        headers: {
-            'Content-Type': 'application/json'
-        }
-    })
-    .then(response => response.json())
-    .then(data => {
-        document.getElementById('ocr-result').innerText = data.result;
-        hideLoading();
-        enableButton();
-    })
-    .catch(error => {
-        console.error(error);
-        hideLoading();
-        enableButton();
-    });
-}
-function showLoading() {
-    const loadingElement = document.getElementById('loading');
-    loadingElement.style.display = 'block';
-    let dots = 0;
-    loadingInterval = setInterval(() => {
-        dots = (dots + 1) % 4;
-        loadingElement.innerText = 'Loading' + '.'.repeat(dots);
-    }, 500);
-}
-function hideLoading() {
-    clearInterval(loadingInterval);
-    const loadingElement = document.getElementById('loading');
-    loadingElement.style.display = 'none';
-}
-function clearOCRResult() {
-    document.getElementById('ocr-result').innerText = '';
-}
-function disableButton() {
-    const button = document.querySelector('button[onclick="performOCR()"]');
-    button.disabled = true;
-    button.style.backgroundColor = '#cccccc';
-}
-function enableButton() {
-    const button = document.querySelector('button[onclick="performOCR()"]');
-    button.disabled = false;
-    button.style.backgroundColor = '#007bff';
-}

HandwritingOCR/templates/home.html DELETED Viewed

@@ -1,18 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Home</title>
-    <link rel="stylesheet" href="/static/css/style.css">
-</head>
-<body>
-    <div class="container">
-        <h1>Welcome to VN OCR</h1>
-        <div class="button-container">
-            <button type="button" onclick="window.location.href='/index'">Go to Webcam Capture</button>
-            <button type="button" onclick="window.location.href='/other'">Go to Image OCR</button>
-        </div>
-    </div>
-</body>
-</html>

HandwritingOCR/templates/index.html DELETED Viewed

@@ -1,29 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Webcam Capture</title>
-    <link rel="stylesheet" href="/static/css/style.css">
-</head>
-<body>
-    <div class="container">
-        <h1>VN OCR</h1>
-        <div class="button-container">
-            <button type="button" onclick="startWebcam()">Start Webcam</button>
-            <button type="button" onclick="captureImage()">Capture Image</button>
-            <button type="button" onclick="performOCR()">Perform OCR</button>
-            <button type="button" onclick="window.location.href='/'">Go to Home</button>
-        </div>
-        <div id="webcam-container">
-            <video id="webcam" autoplay playsinline width="640" height="480"></video>
-        </div>
-        <div id="image-container">
-            <img id="captured-image" src="" alt="Captured Image" style="display: none;">
-        </div>
-        <div id="ocr-result" class="ocr-result"></div>
-        <div id="loading" class="loading" style="display: none;">Loading</div>
-    </div>
-    <script src="/static/js/script.js"></script>
-</body>
-</html>

HandwritingOCR/templates/other.html DELETED Viewed

@@ -1,29 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Image OCR</title>
-    <link rel="stylesheet" href="/static/css/style.css">
-</head>
-<body>
-    <div class="container">
-        <h1>Image OCR</h1>
-        <div class="button-container">
-            <button type="button" onclick="window.location.href='/'">Go to Home</button>
-            <button type="button" onclick="performOCR()">Perform OCR</button>
-        </div>
-        <div class="paste-container">
-            <h2>Paste your image here</h2>
-            <div id="paste-box" contenteditable="true" onpaste="handlePaste(event)">
-                <p></p>
-            </div>
-            <img id="pasted-image" src="" alt="Pasted Image" style="display: none;">
-            <button type="button" onclick="saveImage()">Save Image</button>
-        </div>
-        <div id="ocr-result" class="ocr-result"></div>
-        <div id="loading" class="loading" style="display: none;">Loading</div>
-    </div>
-    <script src="/static/js/other.js"></script>
-</body>
-</html>