Spaces:
Sleeping
Sleeping
File size: 4,626 Bytes
3428d71 eee7d64 0fb7b8c fb25f31 3428d71 0fb7b8c fb25f31 3428d71 0fb7b8c 754c77c 3428d71 eee7d64 3428d71 08a4b4e 946d5fe a7a561c 0b7828a a7a561c 0b7828a 19c21be 0b7828a a7a561c 0b7828a a7a561c 0fb7b8c a7a561c 0fb7b8c 9067afe fb25f31 754c77c 0139118 97b2abf ad2f3c8 7673262 0139118 754c77c 2b305b0 0139118 2b305b0 754c77c 0139118 754c77c 0139118 754c77c 0139118 754c77c 0139118 754c77c 0139118 754c77c 0139118 754c77c 2b305b0 754c77c 2b305b0 754c77c 9c3164f 4f033af 754c77c 45ed86a 4f033af 0139118 754c77c 9c3164f 8f9e3fe 754c77c 3428d71 2b305b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
import pytesseract
import cv2
import os
import numpy as np
from annif_client import AnnifClient
def get_annif_projects():
try:
annif = AnnifClient()
projects = annif.projects
if not projects:
raise ValueError("No projects found from Annif client")
proj_ids = [project["project_id"] for project in projects]
proj_names = [project["name"] for project in projects]
return annif, proj_ids, proj_names
except Exception as e:
print(f"Error initializing Annif client: {str(e)}")
return None, [], []
annif, proj_ids, proj_names = get_annif_projects()
def process(image, project_num: int, lang: str = "eng"):
try:
if not proj_ids:
raise ValueError("No Annif projects available")
if isinstance(image, str):
img = cv2.imread(image)
if img is None:
raise ValueError(f"Unable to read image from path: {image}")
elif isinstance(image, np.ndarray):
img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
else:
raise ValueError("Unsupported image type")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(threshold_img, lang=lang)
if isinstance(image, str):
os.remove(image)
results = annif.suggest(project_id=proj_ids[project_num], text=text)
sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)
html_content = """
<div id="suggestions-wrapper">
<h2 id="suggestions">Suggested subjects</h2>
<ul class="list-group" id="results">
"""
for result in sorted_results:
html_content += f"""
<li class="list-group-item">
<meter value="{result['score']}" min="0" max="1"></meter>
<a href="{result['uri']}">{result['label']}</a>
</li>
"""
html_content += """
</ul>
</div>
"""
return text, html_content
except Exception as e:
return str(e), ""
langs = ("eng", "fin", "swe")
css = """
.gradio-container, .gradio-container *, body, .mygrclass {
color: #343260 !important;
background-color: #f3f3f6;
color: #343260;
font-family: Jost, sans-serif;
font-weight: 400;
font-size: 1rem;
line-height: 1;
}
h1, h1 a {
padding: 2rem 0;
font-weight: 500;
font-size: 2rem;
text-align: center;
}
h2 {
font-weight: 500;
font-size: 1.2rem;
padding: 0.5rem 0;
}
#get-suggestions {
margin: 2rem 0 0 0;
background: #6280dc;
color: white !important;
border: none;
border-radius: 0px;
}
#suggestions-wrapper {
background-color: #f3f3f6;
padding: 1rem;
}
#suggestions {
border-top: 1px solid #343260;
padding-top: 0.5rem;
text-transform: uppercase;
font-size: 1.1rem;
}
.list-group-item {
display: flex;
align-items: center;
padding: 1px 0;
border-bottom: 1px solid #e0e0e0;
}
meter {
width: 24px;
margin-right: 10px;
}
meter:-moz-meter-optimum::-moz-meter-bar {
background: #6280dc;
}
meter::-webkit-meter-bar {
border: none;
border-radius: 0;
height: 18px;
background-color: #ccc;
box-shadow: 0 12px 3px -5px #e6e6e6 inset;
margin: 2 rem;
}
meter::-webkit-meter-optimum-value {
background: #6280dc;
}
"""
with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as demo:
gr.HTML("""
<h1><a href="https://annif.org">Annif</a> demo with image/camera input and OCR</h1>
""")
with gr.Row():
with gr.Column(scale=3):
image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass")
with gr.Column(scale=1):
project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2])
lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass")
submit_btn = gr.Button("Get text & suggestions", elem_id="get-suggestions", elem_classes="mygrclass")
with gr.Row():
with gr.Column(scale=3):
text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass")
with gr.Column(scale=1):
html_output = gr.HTML()
submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output])
demo.launch()
|