Gabriel commited on
Commit
381bbf4
·
1 Parent(s): 21c87da

Starting adding language support

Browse files
Files changed (45) hide show
  1. app/{texts_langs/overview → content/ENG}/changelog_roadmap/changelog.md +0 -0
  2. app/{texts_langs/overview → content/ENG}/changelog_roadmap/old_changelog.md +0 -0
  3. app/{texts_langs/overview → content/ENG}/changelog_roadmap/roadmap.md +0 -0
  4. app/{texts_langs/overview → content/ENG}/contributions/contributions.md +0 -0
  5. app/{texts_langs/overview → content/ENG}/contributions/huminfra_image.md +0 -0
  6. app/{texts_langs/overview → content/ENG}/contributions/riksarkivet_image.md +0 -0
  7. app/{texts_langs/overview → content/ENG}/duplicate_api/api1.md +0 -0
  8. app/{texts_langs/overview → content/ENG}/duplicate_api/api2.md +0 -0
  9. app/{texts_langs/overview → content/ENG}/duplicate_api/api_code1.md +0 -0
  10. app/{texts_langs/overview → content/ENG}/duplicate_api/api_code2.md +0 -0
  11. app/{texts_langs/overview → content/ENG}/duplicate_api/duplicate.md +0 -0
  12. app/{texts_langs/overview → content/ENG}/faq_discussion/discussion.md +0 -0
  13. app/{texts_langs/overview → content/ENG}/faq_discussion/faq.md +0 -0
  14. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_col1.md +0 -0
  15. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_col2.md +0 -0
  16. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_row1.md +0 -0
  17. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab1.md +0 -0
  18. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab2.md +0 -0
  19. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab3.md +0 -0
  20. app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab4.md +0 -0
  21. app/content/SWE/htrflow/htrflow_col1.md +18 -0
  22. app/content/SWE/htrflow/htrflow_col2.md +23 -0
  23. app/content/SWE/htrflow/htrflow_row1.md +3 -0
  24. app/content/SWE/htrflow/htrflow_tab1.md +7 -0
  25. app/content/SWE/htrflow/htrflow_tab2.md +7 -0
  26. app/content/SWE/htrflow/htrflow_tab3.md +7 -0
  27. app/content/SWE/htrflow/htrflow_tab4.md +7 -0
  28. app/content/main_sub_title.md +3 -0
  29. app/content/main_title.md +1 -0
  30. app/gradio_config.py +0 -14
  31. app/main.py +46 -26
  32. app/tabs/adv_htrflow_tab.py +3 -18
  33. app/tabs/htrflow_tab.py +63 -57
  34. app/tabs/overview_tab.py +71 -91
  35. app/templates/steps_template.yaml.j2 +16 -0
  36. app/texts_langs/text_app.py +0 -9
  37. app/texts_langs/text_overview.py +0 -37
  38. app/translation.yaml +13 -0
  39. app/{texts_langs → utils}/__init__.py +0 -0
  40. app/utils/lang_helper.py +7 -0
  41. app/utils/md_helper.py +14 -0
  42. pyproject.toml +2 -1
  43. todo.txt +14 -0
  44. translation.yaml +10 -0
  45. uv.lock +2 -0
app/{texts_langs/overview → content/ENG}/changelog_roadmap/changelog.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/changelog_roadmap/old_changelog.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/changelog_roadmap/roadmap.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/contributions/contributions.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/contributions/huminfra_image.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/contributions/riksarkivet_image.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/duplicate_api/api1.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/duplicate_api/api2.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/duplicate_api/api_code1.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/duplicate_api/api_code2.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/duplicate_api/duplicate.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/faq_discussion/discussion.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/faq_discussion/faq.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_col1.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_col2.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_row1.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab1.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab2.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab3.md RENAMED
File without changes
app/{texts_langs/overview → content/ENG}/htrflow/htrflow_tab4.md RENAMED
File without changes
app/content/SWE/htrflow/htrflow_col1.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Introduktion
2
+
3
+ Riksarkivet presenterar en demonstrationspipeline för HTR (Handwritten Text Recognition). Pipelinen består av två instanssegmenteringsmodeller: en tränad för att segmentera textregioner i bilder av löpande-textdokument och en annan tränad för att segmentera textrader inom dessa regioner. Textraderna transkriberas därefter av en textigenkänningsmodell som är tränad på ett stort dataset med svensk handskrift från 1600- till 1800-talet.
4
+
5
+ ### Användning
6
+
7
+ Det är viktigt att betona att denna applikation främst är avsedd för demonstrationsändamål. Målet är att visa upp vår pipeline för att transkribera historiska dokument med löpande text, inte att använda pipelinen i storskalig produktion.
8
+ **Obs**: I framtiden kommer vi att optimera koden för att passa ett produktionsscenario med multi-GPU och batch-inferens, men detta arbete pågår fortfarande. <br>
9
+
10
+ För en inblick i de kommande funktionerna vi arbetar med:
11
+
12
+ - Navigera till > **Översikt** > **Ändringslogg och roadmap**.
13
+
14
+ ### Begränsningar
15
+
16
+ Demon, som är värd på Huggingface och tilldelad en T4 GPU, kan bara hantera två användarinlämningar åt gången. Om du upplever långa väntetider eller att applikationen inte svarar, är detta anledningen. I framtiden planerar vi att själva vara värdar för denna lösning, med en bättre server för en förbättrad användarupplevelse, optimerad kod och flera modellalternativ. Spännande utveckling är på gång!
17
+
18
+ Det är också viktigt att notera att modellerna fungerar på löpande text och inte text i tabellformat.
app/content/SWE/htrflow/htrflow_col2.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Source Code
2
+
3
+ Please fork and leave a star on Github if you like it! The code for this project can be found here:
4
+
5
+ - [Github](https://github.com/Riksarkivet/HTRFLOW)
6
+
7
+ **Note**: We will in the future package all of the code for mass HTR (batch inference on multi-GPU setup), but the code is still work in progress.
8
+
9
+ ## Models
10
+
11
+ The models used in this demo are very much a work in progress, and as more data, and new architectures, becomes available, they will be retrained and reevaluated. For more information about the models, please refer to their model-cards on Huggingface.
12
+
13
+ - [Riksarkivet/rtmdet_regions](https://huggingface.co/Riksarkivet/rtmdet_regions)
14
+ - [Riksarkivet/rtmdet_lines](https://huggingface.co/Riksarkivet/rtmdet_lines)
15
+ - [Riksarkivet/satrn_htr](https://huggingface.co/https://huggingface.co/Riksarkivet/satrn_htr)
16
+
17
+ ## Datasets
18
+
19
+ Train and testsets created by the Swedish National Archives will be released here:
20
+
21
+ - [Riksarkivet/placeholder_region_segmentation](https://huggingface.co/datasets/Riksarkivet/placeholder_region_segmentation)
22
+ - [Riksarkivet/placeholder_line_segmentation](https://huggingface.co/datasets/Riksarkivet/placeholder_line_segmentation)
23
+ - [Riksarkivet/placeholder_htr](https://huggingface.co/datasets/Riksarkivet/placeholder_htr)
app/content/SWE/htrflow/htrflow_row1.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ## The Pipeline in Overview
2
+
3
+ The steps in the pipeline can be seen below as follows:
app/content/SWE/htrflow/htrflow_tab1.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ### Binarization
2
+
3
+ The reason for binarizing the images before processing them is that we want the models to generalize as well as possible. By training on only binarized images and by binarizing images before running them through the pipeline, we take the target domain closer to the training domain, and reduce negative effects of background variation, background noise etc., on the final results. The pipeline implements a simple adaptive thresholding algorithm for binarization.
4
+
5
+ <figure>
6
+ <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_bin.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
7
+ </figure>
app/content/SWE/htrflow/htrflow_tab2.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ### Text-region segmentation
2
+
3
+ To facilitate the text-line segmentation process, it is advantageous to segment the image into text-regions beforehand. This initial step offers several benefits, including reducing variations in line spacing, eliminating blank areas on the page, establishing a clear reading order, and distinguishing marginalia from the main text. The segmentation model utilized in this process predicts both bounding boxes and masks. Although the model has the capability to predict both, only the masks are utilized for the segmentation tasks of lines and regions. An essential post-processing step involves checking for regions that are contained within other regions. During this step, only the containing region is retained, while the contained region is discarded. This ensures that the final segmented text-regions are accurate and devoid of overlapping or redundant areas. This ensures that there’s no duplicate text-regions sent to the text-recognition model.
4
+
5
+ <figure>
6
+ <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_region.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
7
+ </figure>
app/content/SWE/htrflow/htrflow_tab3.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ### Text-line segmentation
2
+
3
+ This is also an instance segmentation model, trained on extracting text-lines from the cropped text-regions. The same post-processing as in the text-region segmentation step, is done in the text-line segmentation step.
4
+
5
+ <figure>
6
+ <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_line.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
7
+ </figure>
app/content/SWE/htrflow/htrflow_tab4.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ### Text Recognition
2
+
3
+ The text-recognition model was trained on approximately one million handwritten text-line images ranging from the 17th to the 19th century. See the model card for detailed evaluation results, and results from some fine-tuning experiments.
4
+
5
+ <figure>
6
+ <img src="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/app_project_htr.png?raw=true" alt="HTR_tool" style="width:70%; display: block; margin-left: auto; margin-right:auto;" >
7
+ </figure>
app/content/main_sub_title.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ <a href="https://riksarkivet.se">
2
+ <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
3
+ </a>
app/content/main_title.md ADDED
@@ -0,0 +1 @@
 
 
1
+ <h1><center> HTRflow 🔍 App </center></h1>
app/gradio_config.py CHANGED
@@ -19,20 +19,6 @@ body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > a {
19
  body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > div {
20
  display: none !important;
21
  }
22
-
23
- # .top-navbar .tab-container {justify-content: center;}
24
- # .top-navbar .tab-container button {font-size:large !important;}
25
  #langdropdown {width: 100px;}
26
 
27
- #column-form .wrap {flex-direction: column; height:100vh;}
28
-
29
- @media screen and (max-width: 1024px) {
30
- #column-form .wrap {
31
- flex-direction: column;
32
- height: auto;
33
- }
34
- }
35
-
36
- #htrflowouttab-button {opacity: 0; cursor:auto;}
37
-
38
  """
 
19
  body > gradio-app > div > div > div.wrap.svelte-1rjryqp > footer > div {
20
  display: none !important;
21
  }
 
 
 
22
  #langdropdown {width: 100px;}
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
app/main.py CHANGED
@@ -3,50 +3,70 @@ import gradio as gr
3
  from app.gradio_config import css, theme
4
  from app.tabs.adv_htrflow_tab import adv_htrflow_pipeline
5
  from app.tabs.htrflow_tab import htrflow_pipeline
6
- from app.tabs.overview_tab import overview
7
- from app.texts_langs.text_app import TextApp
 
 
 
 
 
 
8
 
9
  with gr.Blocks(title="HTRflow", theme=theme, css=css) as demo:
10
  with gr.Row():
 
 
 
11
  with gr.Column(scale=1):
12
- radio = gr.Dropdown(
13
  choices=["ENG", "SWE"], value="ENG", container=False, min_width=50, scale=0, elem_id="langdropdown"
14
  )
15
 
16
  with gr.Column(scale=2):
17
- gr.Markdown(TextApp.title_markdown)
18
  with gr.Column(scale=1):
19
- gr.Markdown(TextApp.title_markdown_img)
20
 
21
  with gr.Tabs(elem_classes="top-navbar") as navbar:
22
- with gr.Tab("Home"):
23
  overview.render()
24
 
25
- with gr.Tab("Simple HTR"):
26
  htrflow_pipeline.render()
27
 
28
- with gr.Tab("Custom HTR"):
29
  adv_htrflow_pipeline.render()
30
 
31
- # radio.change(
32
- # None,
33
- # inputs=radio,
34
- # js="""
35
- # (data) => {
36
- # window.localStorage.setItem('data', JSON.stringify(data))
37
- # }
38
- # """,
39
- # )
40
-
41
- demo.load(
42
- None,
43
- inputs=radio,
44
- js="""
45
- (data) => {
46
- window.localStorage.setItem('data', JSON.stringify(data))
47
- }
48
- """,
49
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  demo.queue()
52
 
 
3
  from app.gradio_config import css, theme
4
  from app.tabs.adv_htrflow_tab import adv_htrflow_pipeline
5
  from app.tabs.htrflow_tab import htrflow_pipeline
6
+ from app.tabs.overview_tab import overview, overview_language
7
+ from app.utils.lang_helper import get_tab_updates
8
+ from app.utils.md_helper import load_markdown
9
+
10
+ TAB_LABELS = {
11
+ "ENG": ["Home", "Simple HTR", "Custom HTR"],
12
+ "SWE": ["Hem", "Enkel HTR", "Anpassad HTR"],
13
+ }
14
 
15
  with gr.Blocks(title="HTRflow", theme=theme, css=css) as demo:
16
  with gr.Row():
17
+ local_language = gr.BrowserState(default_value="ENG", storage_key="selected_language")
18
+ main_language = gr.State(value="ENG")
19
+
20
  with gr.Column(scale=1):
21
+ language_selector = gr.Dropdown(
22
  choices=["ENG", "SWE"], value="ENG", container=False, min_width=50, scale=0, elem_id="langdropdown"
23
  )
24
 
25
  with gr.Column(scale=2):
26
+ gr.Markdown(load_markdown(None, "main_title"))
27
  with gr.Column(scale=1):
28
+ gr.Markdown(load_markdown(None, "main_sub_title"))
29
 
30
  with gr.Tabs(elem_classes="top-navbar") as navbar:
31
+ with gr.Tab(label="Home") as tab_home:
32
  overview.render()
33
 
34
+ with gr.Tab(label="Simple HTR") as tab_simple_htr:
35
  htrflow_pipeline.render()
36
 
37
+ with gr.Tab(label="Custom HTR") as tab_custom_htr:
38
  adv_htrflow_pipeline.render()
39
 
40
+ @demo.load(inputs=[local_language], outputs=[language_selector, main_language, overview_language])
41
+ def load_language(saved_values):
42
+ return (saved_values,) * 3
43
+
44
+ @language_selector.change(
45
+ inputs=[language_selector],
46
+ outputs=[
47
+ local_language,
48
+ main_language,
49
+ overview_language,
50
+ ],
 
 
 
 
 
 
 
51
  )
52
+ def save_language_to_browser(selected_language):
53
+ return (selected_language,) * 3
54
+
55
+ @main_language.change(
56
+ inputs=[main_language],
57
+ outputs=[
58
+ tab_home,
59
+ tab_simple_htr,
60
+ tab_custom_htr,
61
+ ],
62
+ )
63
+ def update_main_tabs(selected_language):
64
+ return (*get_tab_updates(selected_language, TAB_LABELS),)
65
+
66
+ @main_language.change(inputs=[main_language])
67
+ def on_language_change(selected_language):
68
+ print(f"Language changed to: {selected_language}")
69
+
70
 
71
  demo.queue()
72
 
app/tabs/adv_htrflow_tab.py CHANGED
@@ -11,24 +11,7 @@ with gr.Blocks() as adv_htrflow_pipeline:
11
  with gr.Group():
12
  with gr.Row(visible=True) as yaml_pipeline:
13
  custom_template_yaml = gr.Code(
14
- value="""
15
- steps:
16
- - step: Segmentation
17
- settings:
18
- model: yolo
19
- model_settings:
20
- model: Riksarkivet/yolov9-lines-within-regions-1
21
- - step: TextRecognition
22
- settings:
23
- model: TrOCR
24
- model_settings:
25
- model: Riksarkivet/trocr-base-handwritten-hist-swe-2
26
- - step: OrderLines
27
- - step: Export
28
- settings:
29
- format: txt
30
- dest: outputs
31
- """,
32
  language="yaml",
33
  label="yaml",
34
  interactive=True,
@@ -47,6 +30,8 @@ with gr.Blocks() as adv_htrflow_pipeline:
47
  )
48
 
49
  gr.Image()
 
 
50
  with gr.Tab("Table"):
51
  pass
52
  with gr.Tab("Analysis"):
 
11
  with gr.Group():
12
  with gr.Row(visible=True) as yaml_pipeline:
13
  custom_template_yaml = gr.Code(
14
+ value="Paste your custom pipeline here",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  language="yaml",
16
  label="yaml",
17
  interactive=True,
 
30
  )
31
 
32
  gr.Image()
33
+ with gr.Tab("Graph Excution"):
34
+ pass
35
  with gr.Tab("Table"):
36
  pass
37
  with gr.Tab("Analysis"):
app/tabs/htrflow_tab.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
  from app.assets.examples import DemoImages
5
 
@@ -58,53 +59,61 @@ def get_yaml_button_fn(
58
  nested_segment_model_2_type=None,
59
  nested_htr_model_type=None,
60
  ):
61
- if method == "Simple layout":
62
- yaml_value = f"""steps:
63
- - step: Segmentation
64
- settings:
65
- model: {simple_htr_model_type}
66
- model_settings:
67
- model: {simple_segment_model}
68
- - step: TextRecognition
69
- settings:
70
- model: {simple_segment_model_type}
71
- model_settings:
72
- model: {simple_htr_model}
73
- - step: OrderLines
74
- """
75
- elif method == "Nested segmentation":
76
- yaml_value = f"""steps:
77
- - step: Segmentation
78
- settings:
79
- model: {nested_segment_model_1_type}
80
- model_settings:
81
- model: {nested_segment_model_1}
82
- - step: Segmentation
83
- settings:
84
- model: {nested_segment_model_2_type}
85
- model_settings:
86
- model: {nested_segment_model_2}
87
- - step: TextRecognition
88
- settings:
89
- model: {nested_htr_model_type}
90
- model_settings:
91
- model: {nested_htr_model}
92
- - step: OrderLines
93
- """
94
- else:
95
- return gr.Error("Invalid method or not yet supported.")
96
-
97
- export_steps = ""
98
- for output_format in output_formats:
99
- export_steps += f""" - step: Export
100
- settings:
101
- format: {output_format}
102
- dest: {output_format}-outputs
103
- """
104
-
105
- yaml_value += export_steps
106
-
107
- return yaml_value
 
 
 
 
 
 
 
 
108
 
109
 
110
  output_image_placehholder = gr.Image(label="Output image", height=500, show_share_button=True)
@@ -214,14 +223,14 @@ with gr.Blocks() as htrflow_pipeline:
214
  with gr.Column():
215
  # gr.Markdown("<h2>Output Panel</h2>")
216
  with gr.Tabs():
217
- with gr.Tab("Viewer"): #interactive=False, elem_id="htrflowouttab"
218
  with gr.Group():
219
  with gr.Row():
220
  output_image_placehholder.render()
221
  with gr.Row():
222
  markdown_selected_option.render()
223
  with gr.Row():
224
- output_dataframe_pipeline = gr.Textbox(label="Click text",info="click on image bla bla..")
225
  with gr.Tab("Table") as htrflow_output_table_tab:
226
  with gr.Group():
227
  with gr.Row():
@@ -280,11 +289,8 @@ with gr.Blocks() as htrflow_pipeline:
280
  outputs=[output_yaml_code],
281
  ).then(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
282
 
283
- # TODO : hide the tab when selected for yaml code
284
- # htrflow_output_table_tab.select(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
285
-
286
- template_method_radio.select(
287
- lambda choice: toggle_visibility_default_templates(choice),
288
- inputs=template_method_radio,
289
- outputs=[simple_pipeline, nested_pipeline, table_pipeline, selected_option],
290
- )
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from jinja2 import Environment, FileSystemLoader
4
 
5
  from app.assets.examples import DemoImages
6
 
 
59
  nested_segment_model_2_type=None,
60
  nested_htr_model_type=None,
61
  ):
62
+ env = Environment(loader=FileSystemLoader("app/templates"))
63
+
64
+ template_name = "steps_template.yaml.j2"
65
+ try:
66
+ if method == "Simple layout":
67
+ steps = [
68
+ {
69
+ "step": "Segmentation",
70
+ "model": simple_htr_model_type,
71
+ "model_settings": {"model": simple_segment_model},
72
+ },
73
+ {
74
+ "step": "TextRecognition",
75
+ "model": simple_segment_model_type,
76
+ "model_settings": {"model": simple_htr_model},
77
+ },
78
+ {"step": "OrderLines"},
79
+ ]
80
+ elif method == "Nested segmentation":
81
+ steps = [
82
+ {
83
+ "step": "Segmentation",
84
+ "model": nested_segment_model_1_type,
85
+ "model_settings": {"model": nested_segment_model_1},
86
+ },
87
+ {
88
+ "step": "Segmentation",
89
+ "model": nested_segment_model_2_type,
90
+ "model_settings": {"model": nested_segment_model_2},
91
+ },
92
+ {
93
+ "step": "TextRecognition",
94
+ "model": nested_htr_model_type,
95
+ "model_settings": {"model": nested_htr_model},
96
+ },
97
+ {"step": "OrderLines"},
98
+ ]
99
+ else:
100
+ return "Invalid method or not yet supported."
101
+
102
+ steps.extend(
103
+ {
104
+ "step": "Export",
105
+ "settings": {"format": format, "dest": f"{format}-outputs"},
106
+ }
107
+ for format in output_formats
108
+ )
109
+
110
+ template = env.get_template(template_name)
111
+
112
+ yaml_value = template.render(steps=steps)
113
+ return yaml_value
114
+
115
+ except Exception as e:
116
+ return f"Error generating YAML: {str(e)}"
117
 
118
 
119
  output_image_placehholder = gr.Image(label="Output image", height=500, show_share_button=True)
 
223
  with gr.Column():
224
  # gr.Markdown("<h2>Output Panel</h2>")
225
  with gr.Tabs():
226
+ with gr.Tab("Viewer"): # interactive=False, elem_id="htrflowouttab"
227
  with gr.Group():
228
  with gr.Row():
229
  output_image_placehholder.render()
230
  with gr.Row():
231
  markdown_selected_option.render()
232
  with gr.Row():
233
+ output_dataframe_pipeline = gr.Textbox(label="Click text", info="click on image bla bla..")
234
  with gr.Tab("Table") as htrflow_output_table_tab:
235
  with gr.Group():
236
  with gr.Row():
 
289
  outputs=[output_yaml_code],
290
  ).then(dummy_revealer, inputs=output_yaml_code, outputs=output_yaml_code)
291
 
292
+ template_method_radio.select(
293
+ lambda choice: toggle_visibility_default_templates(choice),
294
+ inputs=template_method_radio,
295
+ outputs=[simple_pipeline, nested_pipeline, table_pipeline, selected_option],
296
+ )
 
 
 
app/tabs/overview_tab.py CHANGED
@@ -1,60 +1,27 @@
1
  import gradio as gr
2
 
3
- from app.texts_langs.text_overview import TextOverview
4
-
5
- default_value_radio_overview = "Home"
6
- overview_choices_eng = [
7
- "Home",
8
- "About App",
9
- "Guide",
10
- "Model & Data",
11
- "Contributions",
12
- "Duplicate App",
13
- "FAQ & Contact",
14
- ]
15
-
16
-
17
- def toggle_visibility(selected_option):
18
- return [
19
- gr.update(visible=(selected_option == "Home")),
20
- gr.update(visible=(selected_option == "About App")),
21
- gr.update(visible=(selected_option == "Guide")),
22
- gr.update(visible=(selected_option == "Model & Data")),
23
- gr.update(visible=(selected_option == "Contributions")),
24
- gr.update(visible=(selected_option == "FAQ & Contact")),
25
- gr.update(visible=(selected_option == "Duplicate App")),
26
- ]
27
 
 
 
 
 
28
 
29
  with gr.Blocks() as overview:
30
- with gr.Row():
31
-
32
- with gr.Column(visible=True, min_width=170, scale=0, variant="panel") as sidebar:
33
- options_overview = gr.Radio(
34
- overview_choices_eng,
35
- label="Side Navigation",
36
- container=False,
37
- value=default_value_radio_overview,
38
- elem_id="column-form",
39
- min_width=100,
40
- scale=0,
41
- )
42
-
43
- with gr.Column(variant="panel") as overview_main:
44
- with gr.Row(visible=True) as overview_home:
45
- with gr.Column():
46
 
47
- gr.Markdown("## landing page to explain version")
48
- gr.Markdown("## htrflow app 1.0.0")
49
- gr.Markdown("## links to different stuff")
50
- gr.Markdown("## Whats new..")
 
51
 
52
- with gr.Row(visible=False) as overview_about:
53
  with gr.Column():
54
- gr.Markdown(TextOverview.htrflow_col1)
55
- gr.Markdown(TextOverview.htrflow_col2)
56
 
57
- with gr.Row(visible=False) as overview_guide:
58
  with gr.Column():
59
  with gr.Row():
60
  with gr.Column():
@@ -71,64 +38,77 @@ with gr.Blocks() as overview:
71
  format="mp4",
72
  )
73
 
74
- with gr.Row(visible=False) as overview_model_data:
75
  with gr.Column():
76
- gr.Markdown(TextOverview.htrflow_row1)
77
  with gr.Tabs():
78
  with gr.Tab("Binarization"):
79
- gr.Markdown(TextOverview.htrflow_tab1)
80
  with gr.Tab("Region segmentation"):
81
- gr.Markdown(TextOverview.htrflow_tab2)
82
  with gr.Tab("Line segmentation"):
83
- gr.Markdown(TextOverview.htrflow_tab3)
84
  with gr.Tab("Text recognition"):
85
- gr.Markdown(TextOverview.htrflow_tab4)
86
 
87
- with gr.Row(visible=False) as overview_contribute:
88
  with gr.Column():
89
- gr.Markdown(TextOverview.contributions)
90
- gr.Markdown(TextOverview.huminfra_image)
91
 
92
- with gr.Row(visible=False) as overview_duplicate:
93
  with gr.Column():
94
- gr.Markdown(TextOverview.duplicate)
95
 
96
  with gr.Column():
97
- gr.Markdown(TextOverview.api1)
98
- gr.Code(
99
- value=TextOverview.api_code1,
100
- language="python",
101
- interactive=False,
102
- show_label=False,
103
- )
104
-
105
- gr.Markdown(TextOverview.api2)
106
-
107
- gr.Code(
108
- value=TextOverview.api_code2,
109
- language=None,
110
- interactive=False,
111
- show_label=False,
112
- )
113
-
114
- with gr.Row(visible=False) as overview_faq:
115
  with gr.Column():
116
- gr.Markdown(TextOverview.text_faq)
117
  with gr.Column():
118
- gr.Markdown(TextOverview.text_discussion)
119
- with gr.Column(visible=True, min_width=0, scale=0) as empty:
120
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- options_overview.change(
123
- lambda choice: toggle_visibility(choice),
124
- inputs=options_overview,
125
  outputs=[
126
- overview_home,
127
- overview_about,
128
- overview_guide,
129
- overview_model_data,
130
- overview_contribute,
131
- overview_duplicate,
132
- overview_faq,
133
  ],
134
  )
 
 
 
1
  import gradio as gr
2
 
3
+ from app.utils.lang_helper import get_tab_updates
4
+ from app.utils.md_helper import load_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ TAB_LABELS = {
7
+ "ENG": ["Overview", "About App", "Guide", "Model & Data", "Contributions", "Duplicate App", "FAQ & Contact"],
8
+ "SWE": ["Översikt", "Om appen", "Guide", "Modell & Data", "Bidrag", "Duplicera App", "FAQ & Kontakt"],
9
+ }
10
 
11
  with gr.Blocks() as overview:
12
+ overview_language = gr.State(value="ENG")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ with gr.Column(variant="panel"):
15
+ with gr.Tabs(elem_classes="top-navbar") as navbar:
16
+ with gr.Tab("Overview") as tab_overview:
17
+ with gr.Column(variant="panel"):
18
+ md1 = gr.Markdown("some text")
19
 
20
+ with gr.Tab("About App") as tab_about:
21
  with gr.Column():
22
+ about_md = gr.Markdown(load_markdown(overview_language.value, "htrflow/htrflow_col1"))
 
23
 
24
+ with gr.Tab("Guide") as tab_guide:
25
  with gr.Column():
26
  with gr.Row():
27
  with gr.Column():
 
38
  format="mp4",
39
  )
40
 
41
+ with gr.Tab("Model & Data") as tab_model_data:
42
  with gr.Column():
43
+ # gr.Markdown(TextOverview.htrflow_row1)
44
  with gr.Tabs():
45
  with gr.Tab("Binarization"):
46
+ gr.Markdown("") # gr.Markdown(TextOverview.htrflow_tab1)
47
  with gr.Tab("Region segmentation"):
48
+ gr.Markdown("") # gr.Markdown(TextOverview.htrflow_tab2)
49
  with gr.Tab("Line segmentation"):
50
+ gr.Markdown("") # gr.Markdown(TextOverview.htrflow_tab3)
51
  with gr.Tab("Text recognition"):
52
+ gr.Markdown("") # gr.Markdown(TextOverview.htrflow_tab4)
53
 
54
+ with gr.Tab("Contributions") as tab_contributions:
55
  with gr.Column():
56
+ gr.Markdown("") # gr.Markdown(TextOverview.contributions)
57
+ gr.Markdown("") # gr.Markdown(TextOverview.huminfra_image)
58
 
59
+ with gr.Tab("Duplicate App") as tab_duplicate_app:
60
  with gr.Column():
61
+ gr.Markdown("") # gr.Markdown(TextOverview.duplicate)
62
 
63
  with gr.Column():
64
+ gr.Markdown("") # gr.Markdown(TextOverview.api1)
65
+ # gr.Code(
66
+ # value=TextOverview.api_code1,
67
+ # language="python",
68
+ # interactive=False,
69
+ # show_label=False,)
70
+
71
+ gr.Markdown("") # gr.Markdown(TextOverview.api2)
72
+
73
+ # gr.Code(
74
+ # value=TextOverview.api_code2,
75
+ # language=None,
76
+ # interactive=False,
77
+ # show_label=False,
78
+ # )
79
+
80
+ with gr.Tab("FAQ & Contact") as tab_faq_contact:
 
81
  with gr.Column():
82
+ gr.Markdown("") # gr.Markdown(TextOverview.text_faq)
83
  with gr.Column():
84
+ gr.Markdown("") # gr.Markdown(TextOverview.text_discussion)
85
+
86
+ overview.load(
87
+ inputs=[overview_language],
88
+ outputs=[about_md],
89
+ )
90
+
91
+ def load_md_text(selected_language):
92
+ return load_markdown(selected_language, "htrflow/htrflow_col1")
93
+
94
+ @overview_language.change(
95
+ inputs=[overview_language],
96
+ outputs=[about_md],
97
+ )
98
+ def change_md_text(selected_language):
99
+ return load_markdown(selected_language, "htrflow/htrflow_col1")
100
 
101
+ @overview_language.change(
102
+ inputs=[overview_language],
 
103
  outputs=[
104
+ tab_overview,
105
+ tab_about,
106
+ tab_guide,
107
+ tab_model_data,
108
+ tab_contributions,
109
+ tab_duplicate_app,
110
+ tab_faq_contact,
111
  ],
112
  )
113
+ def save_language_to_browser(selected_language):
114
+ return (*get_tab_updates(selected_language, TAB_LABELS),)
app/templates/steps_template.yaml.j2 ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ steps:
2
+ {% for step in steps -%}
3
+ - step: {{ step.step }}
4
+ {% if step.model -%}
5
+ settings:
6
+ model: {{ step.model }}
7
+ model_settings:
8
+ model: {{ step.model_settings.model }}
9
+ {% endif -%}
10
+ {% if step.settings -%}
11
+ settings:
12
+ {% for key, value in step.settings.items() -%}
13
+ {{ key }}: {{ value }}
14
+ {% endfor -%}
15
+ {% endif -%}
16
+ {% endfor %}
app/texts_langs/text_app.py DELETED
@@ -1,9 +0,0 @@
1
- class TextApp:
2
- title_markdown = """
3
- <h1><center> HTRflow 🔍 App </center></h1>""" #
4
-
5
- title_markdown_img = """
6
- <a href="https://riksarkivet.se">
7
- <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="17%" align="right" margin-right="100" />
8
- </a>
9
- """
 
 
 
 
 
 
 
 
 
 
app/texts_langs/text_overview.py DELETED
@@ -1,37 +0,0 @@
1
- def read_markdown(file_path: str) -> str:
2
- with open(file_path, "r") as file:
3
- content = file.read()
4
-
5
- return f"""{content}"""
6
-
7
-
8
- class TextOverview:
9
- # HTRFLOW
10
- htrflow_col1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col1.md")
11
- htrflow_col2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_col2.md")
12
- htrflow_row1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_row1.md")
13
- htrflow_tab1 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab1.md")
14
- htrflow_tab2 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab2.md")
15
- htrflow_tab3 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab3.md")
16
- htrflow_tab4 = read_markdown("app/texts_langs/overview/htrflow/htrflow_tab4.md")
17
-
18
- # faq & discussion
19
- text_faq = read_markdown("app/texts_langs/overview/faq_discussion/faq.md")
20
- text_discussion = read_markdown("app/texts_langs/overview/faq_discussion/discussion.md")
21
-
22
- # Contributions
23
- contributions = read_markdown("app/texts_langs/overview/contributions/contributions.md")
24
- huminfra_image = read_markdown("app/texts_langs/overview/contributions/huminfra_image.md")
25
-
26
- # Changelog & Roadmap
27
- changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/changelog.md")
28
- old_changelog = read_markdown("app/texts_langs/overview/changelog_roadmap/old_changelog.md")
29
-
30
- roadmap = read_markdown("app/texts_langs/overview/changelog_roadmap/roadmap.md")
31
-
32
- # duplicate & api
33
- duplicate = read_markdown("app/texts_langs/overview/duplicate_api/duplicate.md")
34
- api1 = read_markdown("app/texts_langs/overview/duplicate_api/api1.md")
35
- api_code1 = read_markdown("app/texts_langs/overview/duplicate_api/api_code1.md")
36
- api2 = read_markdown("app/texts_langs/overview/duplicate_api/api2.md")
37
- api_code2 = read_markdown("app/texts_langs/overview/duplicate_api/api_code2.md")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/translation.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ENG:
2
+ Language: Language
3
+ Home: Home
4
+ Simple HTR: Simple HTR
5
+ Custom HTR: Custom HTR
6
+ # Other translations...
7
+
8
+ SWE:
9
+ Language: Språk
10
+ Home: Hem
11
+ Simple HTR: Enkel HTR
12
+ Custom HTR: Anpassad HTR
13
+ # Other translations...
app/{texts_langs → utils}/__init__.py RENAMED
File without changes
app/utils/lang_helper.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ def get_tab_updates(selected_language, TAB_LABELS):
5
+ """Helper to generate tab updates for the selected language."""
6
+ labels = TAB_LABELS[selected_language]
7
+ return [gr.update(label=label) for label in labels]
app/utils/md_helper.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def load_markdown(language, section, content_dir="app/content"):
5
+ """Load markdown content from files."""
6
+ if language is None:
7
+ file_path = os.path.join(content_dir, f"{section}.md")
8
+ else:
9
+ file_path = os.path.join(content_dir, language, f"{section}.md")
10
+
11
+ if os.path.exists(file_path):
12
+ with open(file_path, "r", encoding="utf-8") as f:
13
+ return f.read()
14
+ return f"## Content missing for {file_path} in {language}"
pyproject.toml CHANGED
@@ -22,6 +22,7 @@ dependencies = [
22
  "gradio>=5.9.1",
23
  "datasets>=3.2.0",
24
  "pandas>=2.2.3",
 
25
  ]
26
 
27
  [project.urls]
@@ -67,4 +68,4 @@ target-version = "py310"
67
 
68
  [tool.ruff.lint]
69
  ignore = ["C901", "E741", "W605"]
70
- select = ["C", "E", "F", "I", "W"]
 
22
  "gradio>=5.9.1",
23
  "datasets>=3.2.0",
24
  "pandas>=2.2.3",
25
+ "jinja2>=3.1.4",
26
  ]
27
 
28
  [project.urls]
 
68
 
69
  [tool.ruff.lint]
70
  ignore = ["C901", "E741", "W605"]
71
+ select = ["C", "E", "F", "I", "W"]
todo.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ TODO: laang, shoulde perhaps est to do it with change event. and update the value on each component. Also we should find a nice way to keep lang stae if we update, e.g. use browser state.
3
+ TODO: graph viz of pipeline
4
+ TODO: Seperate analysis tab, https://www.gradio.app/docs/gradio/highlightedtext,https://huggingface.co/spaces/pngwn/gradio_imageslider
5
+ TODO: Seperate "fiftyone tab", använd https://www.gradio.app/docs/gradio/gallery
6
+ TODO: add support for batch inference and you can load image trough filepath or s3.
7
+ TODO: support hämta från iiif-lb för att köra inference på custom run.
8
+ TODO: accordin on sidetab
9
+ TODO: alot of documentation / tutorials
10
+ TODO: toggle dark and light mode: https://github.com/gradio-app/gradio/issues/7384
11
+ TODO: ssr mode: https://www.gradio.app/docs/gradio/blocks
12
+ TODO: enable monitoring and test api mode: https://www.gradio.app/docs/gradio/blocks
13
+ TODO: tes usage of modal: https://www.gradio.app/custom-components/gallery
14
+ TODO: new docker container.. https://huggingface.co/spaces/pngwn/gradio-docker/blob/main/Dockerfile, need cuda and uv stuff..
translation.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ENG:
2
+ Language: Language
3
+ Home: Home
4
+ Simple HTR: Simple HTR
5
+ Custom HTR: Custom HTR
6
+ SWE:
7
+ Language: Language
8
+ Home: Home
9
+ Simple HTR: Simple HTR
10
+ Custom HTR: Custom HTR
uv.lock CHANGED
@@ -754,6 +754,7 @@ dependencies = [
754
  { name = "datasets", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
755
  { name = "gradio", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
756
  { name = "htrflow", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
 
757
  { name = "pandas", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
758
  { name = "torch", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
759
  ]
@@ -780,6 +781,7 @@ requires-dist = [
780
  { name = "datasets", specifier = ">=3.2.0" },
781
  { name = "gradio", specifier = ">=5.9.1" },
782
  { name = "htrflow", specifier = "==0.1.3" },
 
783
  { name = "mmcv", marker = "extra == 'openmmlab'", specifier = "==2.0.1" },
784
  { name = "mmdet", marker = "extra == 'openmmlab'", specifier = "==3.0.0" },
785
  { name = "mmengine", marker = "extra == 'openmmlab'", specifier = "==0.7.4" },
 
754
  { name = "datasets", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
755
  { name = "gradio", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
756
  { name = "htrflow", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
757
+ { name = "jinja2", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
758
  { name = "pandas", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
759
  { name = "torch", marker = "platform_machine == 'aarch64' or platform_system != 'Linux' or sys_platform != 'win32'" },
760
  ]
 
781
  { name = "datasets", specifier = ">=3.2.0" },
782
  { name = "gradio", specifier = ">=5.9.1" },
783
  { name = "htrflow", specifier = "==0.1.3" },
784
+ { name = "jinja2", specifier = ">=3.1.4" },
785
  { name = "mmcv", marker = "extra == 'openmmlab'", specifier = "==2.0.1" },
786
  { name = "mmdet", marker = "extra == 'openmmlab'", specifier = "==3.0.0" },
787
  { name = "mmengine", marker = "extra == 'openmmlab'", specifier = "==0.7.4" },