Spaces:
Running
on
T4
Running
on
T4
updates to the readme
Browse files- README.md +5 -1
- app.py +10 -30
- helper/text/text_about.py +26 -24
- helper/text/text_howto.py +59 -22
- helper/text/text_roadmap.py +42 -11
- tabs/htr_tool.py +7 -7
README.md
CHANGED
@@ -6,6 +6,10 @@ colorTo: green
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
models:
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
datasets: []
|
11 |
---
|
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
models:
|
9 |
+
[
|
10 |
+
"Riksarkivet/rtmdet_regions",
|
11 |
+
"Riksarkivet/rtmdet_lines",
|
12 |
+
"Riksarkivet/satrn_htr",
|
13 |
+
]
|
14 |
datasets: []
|
15 |
---
|
app.py
CHANGED
@@ -50,48 +50,29 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
|
|
50 |
gr.Markdown(TextHowTo.stepwise_htr_tool_tab4)
|
51 |
gr.Markdown(TextHowTo.stepwise_htr_tool_end)
|
52 |
|
53 |
-
with gr.Tab("API"):
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
gr.Markdown(TextHowTo.htr_tool_api_text)
|
57 |
gr.Code(
|
58 |
-
value=
|
59 |
-
from gradio_client import Client # pip install gradio_client
|
60 |
-
|
61 |
-
# Change url to your client (localhost: http://127.0.0.1:7860/)
|
62 |
-
client = Client("https://huggingface.co/spaces/Riksarkivet/htr_demo")
|
63 |
-
job = client.submit(
|
64 |
-
"https://your.image.url.or.pah.jpg",
|
65 |
-
api_name="/predict",
|
66 |
-
)
|
67 |
-
|
68 |
-
print(job.result())
|
69 |
-
|
70 |
-
""",
|
71 |
language="python",
|
72 |
interactive=False,
|
73 |
show_label=False,
|
74 |
)
|
75 |
-
gr.Markdown(
|
76 |
-
"""
|
77 |
-
Below you can see the results, in XML, from the API call:
|
78 |
-
"""
|
79 |
-
)
|
80 |
-
gr.Markdown(TextHowTo.figure_htr_api)
|
81 |
-
|
82 |
with gr.Column():
|
83 |
gr.Markdown(TextHowTo.duplicatin_space_htr_text)
|
84 |
gr.Markdown(TextHowTo.figure_htr_hardware)
|
85 |
-
|
86 |
-
gr.Markdown(
|
87 |
-
"Note that if you have GPU hardware available, you can also run this application on Docker or clone it locally."
|
88 |
-
)
|
89 |
|
90 |
with gr.Tab("About"):
|
91 |
with gr.Tabs():
|
92 |
with gr.Tab("Project"):
|
93 |
with gr.Row():
|
94 |
-
gr.
|
|
|
|
|
|
|
95 |
with gr.Row():
|
96 |
with gr.Tabs():
|
97 |
with gr.Tab("I. Binarization"):
|
@@ -102,11 +83,10 @@ print(job.result())
|
|
102 |
gr.Markdown(TextAbout.text_line_segmentation)
|
103 |
with gr.Tab("IV. Transcriber"):
|
104 |
gr.Markdown(TextAbout.text_htr)
|
|
|
|
|
105 |
with gr.Row():
|
106 |
-
|
107 |
-
gr.Markdown(TextAbout.text_data)
|
108 |
-
with gr.Column():
|
109 |
-
gr.Markdown(TextAbout.text_models)
|
110 |
|
111 |
with gr.Tab("Roadmap"):
|
112 |
with gr.Row():
|
|
|
50 |
gr.Markdown(TextHowTo.stepwise_htr_tool_tab4)
|
51 |
gr.Markdown(TextHowTo.stepwise_htr_tool_end)
|
52 |
|
53 |
+
with gr.Tab("API & Duplicate for Privat use"):
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
gr.Markdown(TextHowTo.htr_tool_api_text)
|
57 |
gr.Code(
|
58 |
+
value=TextHowTo.code_for_api,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
language="python",
|
60 |
interactive=False,
|
61 |
show_label=False,
|
62 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
with gr.Column():
|
64 |
gr.Markdown(TextHowTo.duplicatin_space_htr_text)
|
65 |
gr.Markdown(TextHowTo.figure_htr_hardware)
|
66 |
+
gr.Markdown(TextHowTo.duplicatin_for_privat)
|
|
|
|
|
|
|
67 |
|
68 |
with gr.Tab("About"):
|
69 |
with gr.Tabs():
|
70 |
with gr.Tab("Project"):
|
71 |
with gr.Row():
|
72 |
+
with gr.Column():
|
73 |
+
gr.Markdown(TextAbout.intro_and_pipeline_overview_text)
|
74 |
+
with gr.Column():
|
75 |
+
gr.Markdown(TextAbout.text_src_code_data_models)
|
76 |
with gr.Row():
|
77 |
with gr.Tabs():
|
78 |
with gr.Tab("I. Binarization"):
|
|
|
83 |
gr.Markdown(TextAbout.text_line_segmentation)
|
84 |
with gr.Tab("IV. Transcriber"):
|
85 |
gr.Markdown(TextAbout.text_htr)
|
86 |
+
|
87 |
+
with gr.Tab("Contribution"):
|
88 |
with gr.Row():
|
89 |
+
gr.Markdown(TextRoadmap.text_contribution)
|
|
|
|
|
|
|
90 |
|
91 |
with gr.Tab("Roadmap"):
|
92 |
with gr.Row():
|
helper/text/text_about.py
CHANGED
@@ -3,21 +3,26 @@ class TextAbout:
|
|
3 |
intro_and_pipeline_overview_text = """
|
4 |
|
5 |
## Introduction
|
6 |
-
The Swedish National Archives
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
## The Pipeline in Overview
|
11 |
|
12 |
-
The steps in the pipeline
|
13 |
"""
|
14 |
|
15 |
binarization = """
|
16 |
|
17 |
### Binarization
|
18 |
-
The reason for binarizing the images before processing them is that we want the models to generalize as well as possible.
|
19 |
-
By training on only binarized images and by binarizing images before running them through the pipeline, we take the target domain closer to the training domain, and ruduce negative effects of background variation, background noise etc., on the final results.
|
20 |
-
The pipeline implements a simple adaptive thresholding algorithm for binarization.
|
21 |
<figure>
|
22 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_bin.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
23 |
</figure>
|
@@ -25,9 +30,7 @@ class TextAbout:
|
|
25 |
"""
|
26 |
text_region_segment = """
|
27 |
### Text-region segmentation
|
28 |
-
To facilitate the text-line segmentation process, it is advantageous to segment the image into text-regions beforehand. This initial step offers several benefits, including reducing variations in line spacing, eliminating blank areas on the page, establishing a clear reading order, and distinguishing marginalia from the main text.
|
29 |
-
The segmentation model utilized in this process predicts both bounding boxes and masks. Although the model has the capability to predict both, only the masks are utilized for the segmentation tasks of lines and regions.
|
30 |
-
An essential post-processing step involves checking for regions that are contained within other regions. During this step, only the containing region is retained, while the contained region is discarded. This ensures that the final segmented text-regions are accurate and devoid of overlapping or redundant areas.
|
31 |
<figure>
|
32 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_region.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
33 |
</figure>
|
@@ -35,8 +38,7 @@ class TextAbout:
|
|
35 |
text_line_segmentation = """
|
36 |
### Text-line segmentation
|
37 |
|
38 |
-
This is also an RTMDet model that's trained on extracting text-lines from cropped text-regions within an image.
|
39 |
-
The same post-processing on the instance segmentation masks is done here as in the text-region segmentation step.
|
40 |
<figure>
|
41 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_line.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
42 |
</figure>
|
@@ -44,28 +46,28 @@ class TextAbout:
|
|
44 |
text_htr = """
|
45 |
### HTR
|
46 |
|
47 |
-
For the text-recognition a SATRN model was trained with mmocr on approximately one million handwritten text-line images ranging from 1600 to 1900.
|
48 |
-
It was trained on a wide variety of archival material to make it generalize as well as possible. See below for detailed evaluation results, and also some finetuning experiments.
|
49 |
<figure>
|
50 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_htr.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
51 |
</figure>
|
52 |
"""
|
53 |
|
54 |
-
|
55 |
-
##
|
|
|
|
|
|
|
56 |
|
|
|
57 |
For a glimpse into the kind of data we're working with, you can explore our sample test data on Hugging Face:
|
58 |
-
|
59 |
-
[HuggingFace Dataset Card](https://huggingface.co/datasets/Riksarkivet/test_images_demo)
|
60 |
-
|
61 |
**Note**: This is just a sample. The complete training dataset will be released in the future.
|
62 |
-
"""
|
63 |
|
64 |
-
text_models = """
|
65 |
## The Models
|
66 |
-
For detailed information about all the models used in this project, please refer to the model cards available on Hugging Face:
|
67 |
-
|
68 |
-
[
|
|
|
69 |
"""
|
70 |
|
71 |
|
|
|
3 |
intro_and_pipeline_overview_text = """
|
4 |
|
5 |
## Introduction
|
6 |
+
The Swedish National Archives introduces a demonstrational end-to-end HTR (Handwritten Text Recognition) pipeline. This pipeline comprises two instance segmentation models: one designated for segmenting text-regions and another for isolating text-lines within these regions, coupled with an HTR model for image-to-text transcription. The objective of this project is to establish a generic pipeline capable of processing running-text documents spanning from 1600 to 1900.
|
7 |
+
|
8 |
+
## Usage
|
9 |
+
It's crucial to emphasize that this application serves primarily for demonstration purposes, aimed at showcasing the various models employed in the current workflow for processing documents with running-text. <br>
|
10 |
+
|
11 |
+
For an insight into the upcoming features we are working on:
|
12 |
+
- Navigate to the > **About** > **Roadmap**.
|
13 |
+
|
14 |
+
To understand how to utilize this application through a REST API, self-host or via Docker,
|
15 |
+
- Navigate to the > **How to Use** > **API & Duplicate for Private Use**.
|
16 |
|
17 |
## The Pipeline in Overview
|
18 |
|
19 |
+
The steps in the pipeline can be seen below as follows:
|
20 |
"""
|
21 |
|
22 |
binarization = """
|
23 |
|
24 |
### Binarization
|
25 |
+
The reason for binarizing the images before processing them is that we want the models to generalize as well as possible. By training on only binarized images and by binarizing images before running them through the pipeline, we take the target domain closer to the training domain, and ruduce negative effects of background variation, background noise etc., on the final results. The pipeline implements a simple adaptive thresholding algorithm for binarization.
|
|
|
|
|
26 |
<figure>
|
27 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_bin.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
28 |
</figure>
|
|
|
30 |
"""
|
31 |
text_region_segment = """
|
32 |
### Text-region segmentation
|
33 |
+
To facilitate the text-line segmentation process, it is advantageous to segment the image into text-regions beforehand. This initial step offers several benefits, including reducing variations in line spacing, eliminating blank areas on the page, establishing a clear reading order, and distinguishing marginalia from the main text. The segmentation model utilized in this process predicts both bounding boxes and masks. Although the model has the capability to predict both, only the masks are utilized for the segmentation tasks of lines and regions. An essential post-processing step involves checking for regions that are contained within other regions. During this step, only the containing region is retained, while the contained region is discarded. This ensures that the final segmented text-regions are accurate and devoid of overlapping or redundant areas.
|
|
|
|
|
34 |
<figure>
|
35 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_region.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
36 |
</figure>
|
|
|
38 |
text_line_segmentation = """
|
39 |
### Text-line segmentation
|
40 |
|
41 |
+
This is also an RTMDet model that's trained on extracting text-lines from cropped text-regions within an image. The same post-processing on the instance segmentation masks is done here as in the text-region segmentation step.
|
|
|
42 |
<figure>
|
43 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_line.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
44 |
</figure>
|
|
|
46 |
text_htr = """
|
47 |
### HTR
|
48 |
|
49 |
+
For the text-recognition a SATRN model was trained with mmocr on approximately one million handwritten text-line images ranging from 1600 to 1900. It was trained on a wide variety of archival material to make it generalize as well as possible. See below for detailed evaluation results, and also some finetuning experiments.
|
|
|
50 |
<figure>
|
51 |
<img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_htr.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
52 |
</figure>
|
53 |
"""
|
54 |
|
55 |
+
text_src_code_data_models = """
|
56 |
+
## Source Code
|
57 |
+
Please fork and leave a star on Github if you like it! The code for this project can be found here:
|
58 |
+
- [Github](https://github.com/Borg93/htr_gradio)
|
59 |
+
**Note**: We will in the future package all of the code for mass htr (batch inference on multi-GPU setup), but the code is still work in progress.
|
60 |
|
61 |
+
## The Dataset
|
62 |
For a glimpse into the kind of data we're working with, you can explore our sample test data on Hugging Face:
|
63 |
+
- [HuggingFace Dataset Card](https://huggingface.co/datasets/Riksarkivet/test_images_demo)
|
|
|
|
|
64 |
**Note**: This is just a sample. The complete training dataset will be released in the future.
|
|
|
65 |
|
|
|
66 |
## The Models
|
67 |
+
The models within this pipeline will be subject to continual retraining and updates as more data becomes accessible. For detailed information about all the models used in this project, please refer to the model cards available on Hugging Face:
|
68 |
+
- [Riksarkivet/rtmdet_regions](https://huggingface.co/Riksarkivet/rtmdet_regions)
|
69 |
+
- [Riksarkivet/rtmdet_lines](https://huggingface.co/Riksarkivet/rtmdet_lines)
|
70 |
+
- [Riksarkivet/satrn_htr](https://huggingface.co/https://huggingface.co/Riksarkivet/satrn_htr)
|
71 |
"""
|
72 |
|
73 |
|
helper/text/text_howto.py
CHANGED
@@ -12,9 +12,7 @@ class TextHowTo:
|
|
12 |
<figcaption style="text-align: center;"> <em> Figure - How to Run the HTR Tool </em></figcaption>
|
13 |
</figure>
|
14 |
The HTR Tool will transform an image of handwritten text into structured, transcribed text within approximately 1-2 minutes (depending on your hardware).
|
15 |
-
Note that the generated page XML file is strucutred in such manner that it allows for an easy integration with other software, such as Transkribus.
|
16 |
-
|
17 |
-
<br><br>
|
18 |
|
19 |
You can use our own developed Image viewer for the xml output:
|
20 |
<p align="center">
|
@@ -33,10 +31,8 @@ class TextHowTo:
|
|
33 |
stepwise_htr_tool = """
|
34 |
## Stepwise HTR Tool
|
35 |
|
36 |
-
The Stepwise HTR Tool is a powerful tool for performing Handwritten Text Recognition (HTR) tasks. The Stepwise version provides you with fine-grained control over each step of the HTR process, allowing for greater customization and troubleshooting capabilities.
|
37 |
-
<br
|
38 |
-
With the Stepwise HTR Tool, you can break down the HTR process into distinct steps: region segmentation, line segmentation, text transcription, and result exploration. This tool offers a range of configuration options to tailor the HTR process to your specific needs. You can adjust settings such as P-threshold and C-threshold to fine-tune the region and line segmentation, and choose from a selection of underlying machine learning models to drive each step of the process.
|
39 |
-
<br><br>
|
40 |
The Stepwise HTR Tool also provides a dedicated Explore Results tab, allowing you to thoroughly analyze and interact with the transcriptions. You can sort and identify both bad and good predictions, helping you gain insights and make improvements to the HTR accuracy. Each step is interconnected, and the output of one step serves as the input for the next step, ensuring a seamless and efficient workflow.
|
41 |
|
42 |
"""
|
@@ -48,14 +44,12 @@ The Stepwise HTR Tool also provides a dedicated Explore Results tab, allowing yo
|
|
48 |
htr_tool_api_text = """
|
49 |
## Usage of Client API
|
50 |
|
51 |
-
For those interested in testing out the demo, it's available to run as a Gradio Python client.
|
52 |
-
To facilitate this, there's a lightweight package called gradio_client that you can easily install via pip.
|
53 |
"""
|
54 |
|
55 |
stepwise_htr_tool_tab1 = """
|
56 |
### Tab 1: Region Segmentation
|
57 |
-
The Region Segmentation tab allows you to perform the initial step of segmenting the handwritten text into regions of interest. By adjusting the P-threshold and C-threshold settings, you can control the confidence score required for a prediction and the minimum overlap or similarity for a detected region to be considered valid. Additionally, you can select an underlying machine learning model for region segmentation.
|
58 |
-
<br><br>
|
59 |
To perform region segmentation, follow these steps:
|
60 |
1. Open the "Region Segmentation" tab.
|
61 |
2. Upload an image or choose an image from the provided Examples (under "Example images to use:" accordin).
|
@@ -69,8 +63,7 @@ To perform region segmentation, follow these steps:
|
|
69 |
|
70 |
### Tab 2: Line Segmentation
|
71 |
In the Line Segmentation tab, you can further refine the segmentation process by identifying individual lines of text.
|
72 |
-
Similar to the Region Segmentation tab, you can adjust the P-threshold and C-threshold settings for line segmentation and choose an appropriate machine learning model.
|
73 |
-
<br><br>
|
74 |
To perform line segmentation, follow these steps:
|
75 |
1. Open the "Line Segmentation" tab.
|
76 |
2. Choice a segmented region from image gallery, which populated with the results from the previous tab.
|
@@ -83,8 +76,7 @@ To perform line segmentation, follow these steps:
|
|
83 |
|
84 |
stepwise_htr_tool_tab3 = """
|
85 |
### Tab 3: Transcribe Text
|
86 |
-
The Transcribe Text tab allows you to convert the segmented text into transcriptions. Here, you can select the desired machine learning model for text transcription.
|
87 |
-
<br><br>
|
88 |
To transcribe text, follow these steps:
|
89 |
1. Open the "Transcribe Text" tab.
|
90 |
2. The image to transcribe is predefined with the results from the previous tab.
|
@@ -95,8 +87,7 @@ To transcribe text, follow these steps:
|
|
95 |
|
96 |
stepwise_htr_tool_tab4 = """
|
97 |
### Tab 4: Explore Results
|
98 |
-
Once the transcription is complete, you can explore the results in the Explore Results tab. This tab provides various features for analyzing and interacting with the transcriptions, allowing you to sort and identify both bad and good predictions.
|
99 |
-
<br><br>
|
100 |
To explore the HTR results, follow these steps:
|
101 |
1. Open the "Explore Results" tab.
|
102 |
2. Analyze the generated results. The image gallery of cropped text line segments is bi-directional coupled through interaction with the dataframe on the left.
|
@@ -128,10 +119,9 @@ Alternatively, you can watch the instructional video below, which provides a ste
|
|
128 |
</figure>
|
129 |
"""
|
130 |
duplicatin_space_htr_text = """
|
131 |
-
## Duplicating
|
132 |
-
|
133 |
-
However, there's a workaround for those who need to make frequent API calls. By duplicating a public Space, you can create your own private Space.
|
134 |
-
This private version allows you to make unlimited requests without any restrictions. So, if you're planning on heavy usage duplicate space:
|
135 |
|
136 |
<br>
|
137 |
<p align="center">
|
@@ -139,10 +129,57 @@ This private version allows you to make unlimited requests without any restricti
|
|
139 |
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Badge 1">
|
140 |
</a>
|
141 |
</p>
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
"""
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
if __name__ == "__main__":
|
148 |
pass
|
|
|
12 |
<figcaption style="text-align: center;"> <em> Figure - How to Run the HTR Tool </em></figcaption>
|
13 |
</figure>
|
14 |
The HTR Tool will transform an image of handwritten text into structured, transcribed text within approximately 1-2 minutes (depending on your hardware).
|
15 |
+
Note that the generated page XML file is strucutred in such manner that it allows for an easy integration with other software, such as Transkribus. <br>
|
|
|
|
|
16 |
|
17 |
You can use our own developed Image viewer for the xml output:
|
18 |
<p align="center">
|
|
|
31 |
stepwise_htr_tool = """
|
32 |
## Stepwise HTR Tool
|
33 |
|
34 |
+
The Stepwise HTR Tool is a powerful tool for performing Handwritten Text Recognition (HTR) tasks. The Stepwise version provides you with fine-grained control over each step of the HTR process, allowing for greater customization and troubleshooting capabilities. <br>
|
35 |
+
With the Stepwise HTR Tool, you can break down the HTR process into distinct steps: region segmentation, line segmentation, text transcription, and result exploration. This tool offers a range of configuration options to tailor the HTR process to your specific needs. You can adjust settings such as P-threshold and C-threshold to fine-tune the region and line segmentation, and choose from a selection of underlying machine learning models to drive each step of the process. <br>
|
|
|
|
|
36 |
The Stepwise HTR Tool also provides a dedicated Explore Results tab, allowing you to thoroughly analyze and interact with the transcriptions. You can sort and identify both bad and good predictions, helping you gain insights and make improvements to the HTR accuracy. Each step is interconnected, and the output of one step serves as the input for the next step, ensuring a seamless and efficient workflow.
|
37 |
|
38 |
"""
|
|
|
44 |
htr_tool_api_text = """
|
45 |
## Usage of Client API
|
46 |
|
47 |
+
For those interested in testing out the demo, it's available to run as a Gradio Python client. To facilitate this, there's a lightweight package called ´gradio_client´ that you can easily install via pip.
|
|
|
48 |
"""
|
49 |
|
50 |
stepwise_htr_tool_tab1 = """
|
51 |
### Tab 1: Region Segmentation
|
52 |
+
The Region Segmentation tab allows you to perform the initial step of segmenting the handwritten text into regions of interest. By adjusting the P-threshold and C-threshold settings, you can control the confidence score required for a prediction and the minimum overlap or similarity for a detected region to be considered valid. Additionally, you can select an underlying machine learning model for region segmentation. <br>
|
|
|
53 |
To perform region segmentation, follow these steps:
|
54 |
1. Open the "Region Segmentation" tab.
|
55 |
2. Upload an image or choose an image from the provided Examples (under "Example images to use:" accordin).
|
|
|
63 |
|
64 |
### Tab 2: Line Segmentation
|
65 |
In the Line Segmentation tab, you can further refine the segmentation process by identifying individual lines of text.
|
66 |
+
Similar to the Region Segmentation tab, you can adjust the P-threshold and C-threshold settings for line segmentation and choose an appropriate machine learning model. <br>
|
|
|
67 |
To perform line segmentation, follow these steps:
|
68 |
1. Open the "Line Segmentation" tab.
|
69 |
2. Choice a segmented region from image gallery, which populated with the results from the previous tab.
|
|
|
76 |
|
77 |
stepwise_htr_tool_tab3 = """
|
78 |
### Tab 3: Transcribe Text
|
79 |
+
The Transcribe Text tab allows you to convert the segmented text into transcriptions. Here, you can select the desired machine learning model for text transcription. <br>
|
|
|
80 |
To transcribe text, follow these steps:
|
81 |
1. Open the "Transcribe Text" tab.
|
82 |
2. The image to transcribe is predefined with the results from the previous tab.
|
|
|
87 |
|
88 |
stepwise_htr_tool_tab4 = """
|
89 |
### Tab 4: Explore Results
|
90 |
+
Once the transcription is complete, you can explore the results in the Explore Results tab. This tab provides various features for analyzing and interacting with the transcriptions, allowing you to sort and identify both bad and good predictions. <br>
|
|
|
91 |
To explore the HTR results, follow these steps:
|
92 |
1. Open the "Explore Results" tab.
|
93 |
2. Analyze the generated results. The image gallery of cropped text line segments is bi-directional coupled through interaction with the dataframe on the left.
|
|
|
119 |
</figure>
|
120 |
"""
|
121 |
duplicatin_space_htr_text = """
|
122 |
+
## Duplicating for Private Use
|
123 |
+
|
124 |
+
It's worth noting that while using any public Space as an API is possible, there's a catch. Hugging Face might rate limit you if you send an excessive number of requests in a short period. However, there's a workaround for those who need to make frequent API calls. By duplicating a public Space, you can create your own private Space. This private version allows you to make unlimited requests without any restrictions. So, if you're planning on heavy usage duplicate space:
|
|
|
125 |
|
126 |
<br>
|
127 |
<p align="center">
|
|
|
129 |
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Badge 1">
|
130 |
</a>
|
131 |
</p>
|
132 |
+
"""
|
133 |
+
|
134 |
+
duplicatin_for_privat = """
|
135 |
+
For individuals with access to dedicated hardware, additional options are available. You have the flexibility to run this application on your own machine utilizing Docker, or by cloning the repository directly. Doing so allows you to leverage your hardware's capabilities to their fullest extent.
|
136 |
+
- [Clone with Docker](https://huggingface.co/spaces/Riksarkivet/htr_demo?docker=true)
|
137 |
+
- [Clone Repo](https://huggingface.co/spaces/Riksarkivet/htr_demo/settings?clone=true)
|
138 |
+
|
139 |
+
**Note**: To take advantage of CUDA for accelerated inferences, an Nvidia graphics card is required. This setup significantly enhances the performance, ensuring a smoother and faster operation.
|
140 |
|
141 |
"""
|
142 |
|
143 |
+
code_for_api = """
|
144 |
+
from gradio_client import Client # pip install gradio_client
|
145 |
+
|
146 |
+
# Change url to your client (localhost: http://127.0.0.1:7860/)
|
147 |
+
client = Client("https://huggingface.co/spaces/Riksarkivet/htr_demo")
|
148 |
+
job = client.submit(
|
149 |
+
"https://your.image.url.or.pah.jpg",
|
150 |
+
api_name="/predict",
|
151 |
+
)
|
152 |
+
|
153 |
+
print(job.result())
|
154 |
+
|
155 |
+
# Loaded as API: http://127.0.0.1:7860/ ✔
|
156 |
+
# <?xml version="1.0" encoding="UTF-8"?>
|
157 |
+
# <PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
|
158 |
+
# <Metadata>
|
159 |
+
# <Creator>Swedish National Archives</Creator>
|
160 |
+
# <Created>2023-08-21, 13:28:06</Created>
|
161 |
+
# </Metadata>
|
162 |
+
# <Page imageFilename="page_xml.xml" imageWidth="4885" imageHeight="4066">
|
163 |
+
# <TextRegion id="region_0" custom="readingOrder {index:0;}">
|
164 |
+
# <Coords points="1477,265 1467,217 1440,201 1370,211 1248,203 1127,224 1067,224 1003,212 844,247 766,243 747,261 742,280 751,332 766,346 1258,341 1357,332 1439,341 1468,327"/>
|
165 |
+
# <TextLine id="line_region_0_0" custom="readingOrder {index:0;}">
|
166 |
+
# <Coords points="1458,248 1443,222 1449,200 1412,215 1366,200 1325,207 1302,200 1241,200 1235,206 1205,200 1187,210 1085,222 957,206 795,239 769,273 771,333 783,340 1445,333 1450,324"/>
|
167 |
+
# <TextEquiv>
|
168 |
+
# <Unicode>År 1865.</Unicode>
|
169 |
+
# </TextEquiv>
|
170 |
+
# <PredScore pred_score="0.9482"/>
|
171 |
+
# </TextLine>
|
172 |
+
# </TextRegion>
|
173 |
+
# <TextRegion id="region_1" custom="readingOrder {index:1;}">
|
174 |
+
# ......................................
|
175 |
+
# </TextRegion>
|
176 |
+
# </Page>
|
177 |
+
# </PcGts>
|
178 |
+
|
179 |
+
# Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
|
180 |
+
|
181 |
+
"""
|
182 |
+
|
183 |
|
184 |
if __name__ == "__main__":
|
185 |
pass
|
helper/text/text_roadmap.py
CHANGED
@@ -1,28 +1,27 @@
|
|
1 |
class TextRoadmap:
|
2 |
roadmap = """
|
3 |
## Roadmap
|
4 |
-
|
5 |
-
|
6 |
- Continually retrain and update both segmentation and text-recognition models as more training data becomes available.
|
7 |
-
|
8 |
-
|
|
|
|
|
9 |
- Train a TrOCR model specialized on Swedish historical handwritten text.
|
10 |
- Initialize with a historical BERT-model trained at the Swedish National Archives.
|
11 |
|
12 |
-
|
13 |
- Develop an easy-to-implement pipeline like the demo.
|
14 |
- Ensure high modularity:
|
15 |
- Different segmentation strategies.
|
16 |
- Integration of models from various frameworks.
|
17 |
- Effective evaluation methods for entire pipelines and their comparisons.
|
18 |
- Broad use-cases: Not just running text, but all types of handwritten archives.
|
19 |
-
|
20 |
-
- **Large Scale HTR**
|
21 |
-
- Conduct large scale HTR on handwritten historical archives of interest to humanities researchers.
|
22 |
|
23 |
-
|
24 |
-
- Serve model through inference APIs
|
25 |
-
|
26 |
"""
|
27 |
|
28 |
discussion = """
|
@@ -41,3 +40,35 @@ class TextRoadmap:
|
|
41 |
<figure>
|
42 |
<img src="https://raw.githubusercontent.com/Borg93/htr_gradio_file_placeholder/main/roadmap_image_2.png" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
43 |
</figure> """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
class TextRoadmap:
|
2 |
roadmap = """
|
3 |
## Roadmap
|
4 |
+
|
5 |
+
#### ☑ Release Model on HuggingFace
|
6 |
- Continually retrain and update both segmentation and text-recognition models as more training data becomes available.
|
7 |
+
|
8 |
+
#### ☐ Release Training and Eval data on HuggingFace
|
9 |
+
|
10 |
+
#### ☐ Specialized TrOCR Model
|
11 |
- Train a TrOCR model specialized on Swedish historical handwritten text.
|
12 |
- Initialize with a historical BERT-model trained at the Swedish National Archives.
|
13 |
|
14 |
+
#### ☐ Open-source and package HTR-pipeline for mass HTR
|
15 |
- Develop an easy-to-implement pipeline like the demo.
|
16 |
- Ensure high modularity:
|
17 |
- Different segmentation strategies.
|
18 |
- Integration of models from various frameworks.
|
19 |
- Effective evaluation methods for entire pipelines and their comparisons.
|
20 |
- Broad use-cases: Not just running text, but all types of handwritten archives.
|
|
|
|
|
|
|
21 |
|
22 |
+
#### ☐ Inference Endpoints
|
23 |
+
- Serve model through inference APIs / Rest APIs with dedicated hardware.
|
24 |
+
|
25 |
"""
|
26 |
|
27 |
discussion = """
|
|
|
40 |
<figure>
|
41 |
<img src="https://raw.githubusercontent.com/Borg93/htr_gradio_file_placeholder/main/roadmap_image_2.png" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
|
42 |
</figure> """
|
43 |
+
|
44 |
+
text_contribution = """
|
45 |
+
## Project Contributions
|
46 |
+
|
47 |
+
We extend our deepest gratitude to the individuals and organizations who have made this project possible through their invaluable contributions, especially in providing datasets for training the models. Their generosity and collaboration have significantly propelled the project forward.
|
48 |
+
|
49 |
+
### Datasets Contributors
|
50 |
+
|
51 |
+
- [Name/Organization]: Provided [Name of Dataset] which was instrumental in training [Specify Model].
|
52 |
+
- [Name/Organization]: Contributed [Name of Dataset] that greatly enhanced the performance of [Specify Model].
|
53 |
+
- [Name/Organization]: Generously shared [Name of Dataset] enabling us to refine [Specify Model].
|
54 |
+
- ... (continue listing contributors as necessary)
|
55 |
+
|
56 |
+
### Other Contributions
|
57 |
+
|
58 |
+
- [Name/Organization]: For [description of contribution, e.g., code, testing, design].
|
59 |
+
- ... (continue listing contributors as necessary)
|
60 |
+
|
61 |
+
### Special Mentions
|
62 |
+
|
63 |
+
- ... (mention any other individuals/organizations that played a significant role)
|
64 |
+
|
65 |
+
We are immensely thankful for the collective effort and dedication that has significantly contributed to the progress of this project. The open collaboration and sharing of resources underscore the community’s commitment to advancing the field.
|
66 |
+
|
67 |
+
For further details on contributions or if you are interested in contributing, please refer to our Contribution Guidelines or contact [Your Contact Information].
|
68 |
+
|
69 |
+
Thank you!
|
70 |
+
|
71 |
+
// Riksarkivet
|
72 |
+
|
73 |
+
|
74 |
+
"""
|
tabs/htr_tool.py
CHANGED
@@ -68,7 +68,7 @@ with gr.Blocks() as htr_tool_tab:
|
|
68 |
with gr.Row():
|
69 |
radio_file_input = gr.CheckboxGroup(
|
70 |
choices=["Txt", "XML"],
|
71 |
-
value=["XML"],
|
72 |
label="Output file extension",
|
73 |
# info="Only txt and page xml is supported for now!",
|
74 |
scale=1,
|
@@ -103,23 +103,23 @@ with gr.Blocks() as htr_tool_tab:
|
|
103 |
)
|
104 |
|
105 |
htr_tool_region_segment_model_dropdown = gr.Dropdown(
|
106 |
-
choices=["Riksarkivet/
|
107 |
-
value="Riksarkivet/
|
108 |
label="Region Segment models",
|
109 |
info="Will add more models later!",
|
110 |
)
|
111 |
|
112 |
# with gr.Accordion("Transcribe settings:", open=False):
|
113 |
htr_tool_line_segment_model_dropdown = gr.Dropdown(
|
114 |
-
choices=["Riksarkivet/
|
115 |
-
value="Riksarkivet/
|
116 |
label="Line Segment models",
|
117 |
info="Will add more models later!",
|
118 |
)
|
119 |
|
120 |
htr_tool_transcriber_model_dropdown = gr.Dropdown(
|
121 |
-
choices=["Riksarkivet/
|
122 |
-
value="Riksarkivet/
|
123 |
label="Transcriber models",
|
124 |
info="Models will be continuously updated with future additions for specific cases.",
|
125 |
)
|
|
|
68 |
with gr.Row():
|
69 |
radio_file_input = gr.CheckboxGroup(
|
70 |
choices=["Txt", "XML"],
|
71 |
+
value=["Txt", "XML"],
|
72 |
label="Output file extension",
|
73 |
# info="Only txt and page xml is supported for now!",
|
74 |
scale=1,
|
|
|
103 |
)
|
104 |
|
105 |
htr_tool_region_segment_model_dropdown = gr.Dropdown(
|
106 |
+
choices=["Riksarkivet/rtmdet_region"],
|
107 |
+
value="Riksarkivet/rtmdet_region",
|
108 |
label="Region Segment models",
|
109 |
info="Will add more models later!",
|
110 |
)
|
111 |
|
112 |
# with gr.Accordion("Transcribe settings:", open=False):
|
113 |
htr_tool_line_segment_model_dropdown = gr.Dropdown(
|
114 |
+
choices=["Riksarkivet/rtmdet_lines"],
|
115 |
+
value="Riksarkivet/rtmdet_lines",
|
116 |
label="Line Segment models",
|
117 |
info="Will add more models later!",
|
118 |
)
|
119 |
|
120 |
htr_tool_transcriber_model_dropdown = gr.Dropdown(
|
121 |
+
choices=["Riksarkivet/satrn_htr", "microsoft/trocr-base-handwritten"],
|
122 |
+
value="Riksarkivet/satrn_htr",
|
123 |
label="Transcriber models",
|
124 |
info="Models will be continuously updated with future additions for specific cases.",
|
125 |
)
|