File size: 4,626 Bytes
3428d71
 
 
 
 
eee7d64
0fb7b8c
fb25f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3428d71
0fb7b8c
fb25f31
 
 
3428d71
 
 
 
 
 
 
 
 
0fb7b8c
754c77c
3428d71
eee7d64
3428d71
 
 
 
08a4b4e
946d5fe
a7a561c
 
0b7828a
 
 
a7a561c
 
 
 
0b7828a
 
19c21be
0b7828a
a7a561c
 
0b7828a
 
 
 
a7a561c
 
0fb7b8c
a7a561c
0fb7b8c
9067afe
fb25f31
754c77c
0139118
97b2abf
ad2f3c8
7673262
 
 
0139118
 
754c77c
2b305b0
0139118
2b305b0
 
 
754c77c
 
 
0139118
754c77c
 
 
0139118
 
 
754c77c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0139118
754c77c
 
 
 
 
 
0139118
 
 
754c77c
0139118
 
 
754c77c
0139118
 
754c77c
 
 
 
 
 
2b305b0
 
754c77c
2b305b0
754c77c
 
 
9c3164f
4f033af
754c77c
45ed86a
4f033af
0139118
754c77c
 
9c3164f
8f9e3fe
754c77c
 
 
 
3428d71
2b305b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import pytesseract
import cv2
import os
import numpy as np
from annif_client import AnnifClient

def get_annif_projects():
    try:
        annif = AnnifClient()
        projects = annif.projects
        if not projects:
            raise ValueError("No projects found from Annif client")
        proj_ids = [project["project_id"] for project in projects]
        proj_names = [project["name"] for project in projects]
        return annif, proj_ids, proj_names
    except Exception as e:
        print(f"Error initializing Annif client: {str(e)}")
        return None, [], []

annif, proj_ids, proj_names = get_annif_projects()

def process(image, project_num: int, lang: str = "eng"):
    try:
        if not proj_ids:
            raise ValueError("No Annif projects available")

        if isinstance(image, str):
            img = cv2.imread(image)
            if img is None:
                raise ValueError(f"Unable to read image from path: {image}")
        elif isinstance(image, np.ndarray):
            img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        else:
            raise ValueError("Unsupported image type")

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

        text = pytesseract.image_to_string(threshold_img, lang=lang)

        if isinstance(image, str):
            os.remove(image)

        results = annif.suggest(project_id=proj_ids[project_num], text=text)
        sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)

        html_content = """
        <div id="suggestions-wrapper">
            <h2 id="suggestions">Suggested subjects</h2>
            <ul class="list-group" id="results">
        """

        for result in sorted_results:
            html_content += f"""
            <li class="list-group-item">
                <meter value="{result['score']}" min="0" max="1"></meter>
                <a href="{result['uri']}">{result['label']}</a>
            </li>
            """

        html_content += """
            </ul>
        </div>
        """

        return text, html_content
    except Exception as e:
        return str(e), ""

langs = ("eng", "fin", "swe")

css = """
.gradio-container, .gradio-container *, body, .mygrclass {
    color: #343260 !important;
    background-color: #f3f3f6;
    color: #343260;
    font-family: Jost, sans-serif;
    font-weight: 400;
    font-size: 1rem;
    line-height: 1;
}
h1, h1 a {
    padding: 2rem 0;
    font-weight: 500;
    font-size: 2rem;
    text-align: center;
}
h2 {
    font-weight: 500;
    font-size: 1.2rem;
    padding: 0.5rem 0;
}
#get-suggestions {
    margin: 2rem 0 0 0;
    background: #6280dc;
    color: white !important;
    border: none;
    border-radius: 0px;
}
#suggestions-wrapper {
    background-color: #f3f3f6;
    padding: 1rem;
}
#suggestions {
    border-top: 1px solid #343260;
    padding-top: 0.5rem;
    text-transform: uppercase;
    font-size: 1.1rem;
}
.list-group-item {
    display: flex;
    align-items: center;
    padding: 1px 0;
    border-bottom: 1px solid #e0e0e0;
}
meter {
    width: 24px;
    margin-right: 10px;
}
meter:-moz-meter-optimum::-moz-meter-bar {
    background: #6280dc;
}
meter::-webkit-meter-bar {
    border: none;
    border-radius: 0;
    height: 18px;
    background-color: #ccc;
    box-shadow: 0 12px 3px -5px #e6e6e6 inset;
    margin: 2 rem;
}
meter::-webkit-meter-optimum-value {
    background: #6280dc;
}
"""

with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as demo:

    gr.HTML("""
    <h1><a href="https://annif.org">Annif</a> demo with image/camera input and OCR</h1>
    """)

    with gr.Row():
        with gr.Column(scale=3):
            image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass")
        with gr.Column(scale=1):
            project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2])
            lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass")
            submit_btn = gr.Button("Get text & suggestions", elem_id="get-suggestions", elem_classes="mygrclass")

    with gr.Row():
        with gr.Column(scale=3):
            text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass")
        with gr.Column(scale=1):
            html_output = gr.HTML()

    submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output])

demo.launch()