amlpai04 commited on
Commit
44c7f77
·
1 Parent(s): b670e83

finish tabs job and templating

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. app/main.py +31 -3
  3. app/tabs/adv_htrflow_tab.py +0 -73
  4. app/tabs/submit.py +70 -145
  5. app/tabs/templating.py +126 -68
.gitignore CHANGED
@@ -1,5 +1,7 @@
1
  venv/
2
  .vscode/
 
 
3
 
4
  # Byte-compiled / optimized / DLL files
5
  */__pycache__
 
1
  venv/
2
  .vscode/
3
+ .cache/
4
+ outputs/
5
 
6
  # Byte-compiled / optimized / DLL files
7
  */__pycache__
app/main.py CHANGED
@@ -1,13 +1,13 @@
1
  import gradio as gr
2
 
3
  from app.gradio_config import css, theme
4
- from app.tabs.adv_htrflow_tab import adv_htrflow_pipeline
5
- from app.tabs.submit import submit
6
  from app.tabs.examples_tab import examples
7
  from app.tabs.templating import (
8
  templating_block,
9
  TEMPLATE_IMAGE_FOLDER,
10
  TEMPLATE_YAML_FOLDER,
 
11
  )
12
  from app.utils.md_helper import load_markdown
13
 
@@ -34,8 +34,36 @@ with gr.Blocks(title="HTRflow", theme=theme, css=css) as demo:
34
  with gr.Tab(label="Output & Visualize") as tab_examples:
35
  examples.render()
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  demo.queue()
39
 
40
  if __name__ == "__main__":
41
- demo.launch(server_name="0.0.0.0", server_port=7862, enable_monitoring=False)
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  from app.gradio_config import css, theme
4
+ from app.tabs.submit import submit, custom_template_yaml
 
5
  from app.tabs.examples_tab import examples
6
  from app.tabs.templating import (
7
  templating_block,
8
  TEMPLATE_IMAGE_FOLDER,
9
  TEMPLATE_YAML_FOLDER,
10
+ template_output_yaml_code,
11
  )
12
  from app.utils.md_helper import load_markdown
13
 
 
34
  with gr.Tab(label="Output & Visualize") as tab_examples:
35
  examples.render()
36
 
37
+ @demo.load(
38
+ inputs=[template_output_yaml_code],
39
+ outputs=[template_output_yaml_code],
40
+ )
41
+ def inital_yaml_code(template_output_yaml_code):
42
+ return template_output_yaml_code
43
+
44
+ def sync_yaml_state(input_value, state_value):
45
+ """Synchronize the YAML state if there is a mismatch."""
46
+ return input_value if input_value != state_value else gr.skip()
47
+
48
+ tab_submit.select(
49
+ inputs=[template_output_yaml_code, custom_template_yaml],
50
+ outputs=[custom_template_yaml],
51
+ fn=sync_yaml_state,
52
+ )
53
+
54
+ tab_templating.select(
55
+ inputs=[custom_template_yaml, template_output_yaml_code],
56
+ outputs=[template_output_yaml_code],
57
+ fn=sync_yaml_state,
58
+ )
59
+
60
 
61
  demo.queue()
62
 
63
  if __name__ == "__main__":
64
+ demo.launch(
65
+ server_name="0.0.0.0",
66
+ server_port=7862,
67
+ enable_monitoring=False,
68
+ show_error=True,
69
+ )
app/tabs/adv_htrflow_tab.py DELETED
@@ -1,73 +0,0 @@
1
- import gradio as gr
2
-
3
-
4
- with gr.Blocks() as adv_htrflow_pipeline:
5
- with gr.Row(variant="panel"):
6
- with gr.Column(scale=3):
7
-
8
- image_batch_input = gr.Gallery(
9
- file_types=["image"],
10
- label="Upload images",
11
- interactive=True,
12
- object_fit="cover",
13
- preview=True,
14
- columns=5,
15
- )
16
-
17
- with gr.Row(visible=True) as yaml_pipeline:
18
- with gr.Accordion(label="Run Template", open=False):
19
-
20
- custom_template_yaml = gr.Code(
21
- value="Paste your custom pipeline here",
22
- language="yaml",
23
- label="yaml",
24
- # show_label=False,
25
- interactive=True,
26
- lines=5,
27
- )
28
-
29
- with gr.Row():
30
- run_button = gr.Button("Submit", variant="primary", scale=0)
31
- cancel_button = gr.Button(
32
- "stop", variant="stop", scale=0, visible=False
33
- )
34
- d = gr.DownloadButton(
35
- "Download the file", visible=False, scale=0
36
- ) # TODO: This should be hidden until the run button is clicked
37
-
38
- textbox_ = gr.Textbox(scale=0, visible=False)
39
-
40
- with gr.Column(scale=3):
41
- with gr.Tabs():
42
- with gr.Tab("HTR ouput"):
43
- gr.CheckboxGroup(
44
- ["Reading Order", "Line", "Region", "Word"],
45
- info="Checkboxgroup should be basedon output structure from htrflow",
46
- )
47
-
48
- gr.Image(
49
- interactive=False,
50
- show_fullscreen_button=True,
51
- show_share_button=True,
52
- )
53
-
54
- with gr.Tab("Table"):
55
- pass
56
- with gr.Tab("Analysis"):
57
- # TODO add https://www.gradio.app/docs/gradio/highlightedtext and graph of run graph
58
- pass
59
-
60
- def foo():
61
- gr.Info("hello morgan")
62
- return gr.update(visible=True), "test"
63
-
64
- click_event = run_button.click(
65
- fn=foo, inputs=None, outputs=[cancel_button, textbox_]
66
- ).then(fn=lambda: gr.update(visible=False), inputs=None, outputs=cancel_button)
67
-
68
- cancel_button.click(
69
- fn=lambda: gr.update(visible=False),
70
- inputs=None,
71
- outputs=cancel_button,
72
- cancels=[click_event],
73
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/tabs/submit.py CHANGED
@@ -1,154 +1,79 @@
1
  import time
2
  import gradio as gr
 
 
 
3
 
4
- default_template_yaml = """
5
- steps:
6
 
7
- # Region segmentation
8
- - step: Segmentation
9
- settings:
10
- model: yolo
11
- model_settings:
12
- model: Riksarkivet/yolov9-regions-1
13
- revision: 7c44178d85926b4a096c55c89bf224855a201fbf
14
- generation_settings:
15
- batch_size: 32
16
- half: true
17
- conf: 0.5 # confidence threshold - keep all boxes with conf > 0.5
18
 
19
- # Line segmentation
20
- - step: Segmentation
21
- settings:
22
- model: yolo
23
- model_settings:
24
- model: Riksarkivet/yolov9-lines-within-regions-1
25
- revision: ea2f8987cba316abc62762f3030266ec8875338d
26
- generation_settings:
27
- batch_size: 16
28
- half: true
29
-
30
- - step: FilterRegionsByShape
31
- settings:
32
- min_ratio: 0.5 # keep lines that have at least a 1:2 (=0.5) width-to-height ratio
33
-
34
- - step: TextRecognition
35
- settings:
36
- model: ORTWordLevelTrocr
37
- model_settings:
38
- model: ./model
39
- generation_settings:
40
- batch_size: 64
41
- num_beams: 1
42
- max_new_tokens: 64
43
-
44
- - step: ReadingOrderMarginalia
45
- settings:
46
- two_page: true
47
-
48
- # Remove garbage lines: anything below 0.7 in confidence
49
- - step: RemoveLowTextConfidenceLines
50
- settings:
51
- threshold: 0.7
52
-
53
- # Remove garbage regions: any region with mean confidence < 0.75 AFTER the below-0.7 lines have been removed
54
- - step: RemoveLowTextConfidenceRegions
55
- settings:
56
- threshold: 0.75
57
-
58
- - step: Export
59
- settings:
60
- dest: output/job
61
- format: alto
62
- template_dir: /app/config
63
- template_name: alto-with-RA-pageID
64
-
65
- - step: Export
66
- settings:
67
- dest: output/job
68
- format: json
69
- indent: null
70
-
71
- # Sets label format to regionX_lineY_wordZ
72
- labels:
73
- level_labels:
74
- - region
75
- - line
76
- - word
77
- sep: _
78
- template: "{label}{number}"
79
- """
80
 
81
  with gr.Blocks() as submit:
82
-
83
- # Row 2: Image Upload and Editor
84
- with gr.Row(variant="panel"):
85
- image_editor = gr.ImageEditor(
86
- label="Upload the image you want to transcribe",
87
- sources="upload",
88
- interactive=True,
89
- layers=False,
90
- eraser=False,
91
- brush=False,
92
- height=400,
93
- transforms="crop",
94
- crop_size="16,5",
95
- visible=False,
96
- )
97
- image_mask = gr.Gallery(
98
- file_types=["image"],
99
- label="Upload the image you want to transcribe",
100
- interactive=True,
101
- height=400,
102
- object_fit="cover",
103
- columns=5,
104
- )
105
-
106
- # Row 3: Run Template Accordion
107
- with gr.Row():
108
- with gr.Accordion(label="Pipeline yaml (expand to customize settings)", open=False):
109
- # Add the documentation link with an emoji first
110
- docs_link = gr.HTML(
111
- value='<p>Bellow is the pipeline yaml that will be used. <a href="https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines" target="_blank">📚 Click here 📚</a> for a detailed description on how to customize the configuration</p>'
112
- )
113
- # Then, the code block with the template YAML
114
- custom_template_yaml = gr.Code(
115
- value=default_template_yaml, # Set the default template YAML here
116
- language="yaml",
117
- label="yaml",
118
- interactive=True,
119
- lines=5,
120
- )
121
-
122
- # Row 4: Submit and Cancel Buttons
123
- with gr.Row():
124
- run_button = gr.Button("Submit", variant="primary", scale=0)
125
- cancel_button = gr.Button("Stop", variant="stop", scale=0, visible=False)
126
- d = gr.DownloadButton("Download the file", visible=False, scale=0)
127
- textbox_ = gr.Textbox(scale=0, visible=False)
128
-
129
- # Cancel button functionality
130
- cancel_button.click(
131
- fn=lambda: gr.update(visible=False),
132
- inputs=None,
133
- outputs=cancel_button,
134
  )
135
-
136
- # Image Editor Upload handling
137
- image_editor.upload(
138
- fn=None,
139
- inputs=None,
140
- outputs=None,
141
- js="""
142
- () => {
143
- const button = document.querySelector('button[aria-label="Transform button"][title="Transform button"]');
144
- if (button) {
145
- button.click();
146
- console.log('Transform button clicked.');
147
- } else {
148
- console.error('Transform button not found.');
149
- }
150
- }
151
- """,
152
- ).then(
153
- fn=lambda: gr.update(crop=None), inputs=None, outputs=image_editor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  )
 
1
  import time
2
  import gradio as gr
3
+ from htrflow.pipeline.pipeline import Pipeline
4
+ from htrflow.pipeline.steps import auto_import
5
+ import yaml
6
 
 
 
7
 
8
+ MAX_IMAGES = 5 # Maximum allowed images
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  with gr.Blocks() as submit:
12
+ with gr.Column(variant="panel"):
13
+ with gr.Group():
14
+ with gr.Row():
15
+ with gr.Column(scale=1):
16
+ batch_image_gallery = gr.Gallery(
17
+ file_types=["image"],
18
+ label="Upload the images you want to transcribe",
19
+ interactive=True,
20
+ height=400,
21
+ object_fit="cover",
22
+ columns=5,
23
+ )
24
+
25
+ with gr.Column(scale=1):
26
+ custom_template_yaml = gr.Code(
27
+ value="",
28
+ language="yaml",
29
+ label="Pipeline",
30
+ interactive=True,
31
+ )
32
+ with gr.Row():
33
+ run_button = gr.Button("Submit", variant="primary", scale=0, min_width=160)
34
+
35
+ @batch_image_gallery.upload(
36
+ inputs=batch_image_gallery,
37
+ outputs=[batch_image_gallery],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
+ def validate_images(images):
40
+ if len(images) > 5:
41
+ gr.Warning(f"Maximum images you can upload is set to: {MAX_IMAGES}")
42
+ return gr.update(value=None)
43
+ return images
44
+
45
+ def my_function(custom_template_yaml, batch_image_gallery, progress=gr.Progress()):
46
+ config = yaml.safe_load(custom_template_yaml)
47
+
48
+ image, _ = batch_image_gallery[0]
49
+
50
+ pipe = Pipeline.from_config(config)
51
+ print(batch_image_gallery)
52
+ collections = auto_import(image)
53
+
54
+ label = "HTRflow demo"
55
+
56
+ for collection in collections:
57
+ if "labels" in config:
58
+ collection.set_label_format(**config["labels"])
59
+ if label:
60
+ collection.label = label
61
+ collection = pipe.run(collection)
62
+
63
+ # progress(0, desc="Starting...")
64
+ # time.sleep(1)
65
+ # for i in progress.tqdm(range(100)):
66
+ # if i == 20:
67
+ # gr.Info("hej morgan")
68
+ # if i == 50:
69
+ # gr.Info("hej morgan2")
70
+ # time.sleep(0.1)
71
+ # gr.Info("hej morgan nu är jag klar")
72
+ print(collection)
73
+ return gr.skip()
74
+
75
+ run_button.click(
76
+ fn=my_function,
77
+ inputs=[custom_template_yaml, batch_image_gallery],
78
+ outputs=batch_image_gallery,
79
  )
app/tabs/templating.py CHANGED
@@ -2,49 +2,65 @@ import gradio as gr
2
  import os
3
  import re
4
 
5
- template_image_folder = "app/assets/images"
6
- template_yaml_folder = "app/assets/templates"
7
-
8
- image_files = sorted(
9
- [
10
- os.path.join(template_image_folder, img)
11
- for img in os.listdir(template_image_folder)
12
- if img.lower().endswith((".png", ".jpg", ".jpeg", ".webp"))
13
- ],
14
- key=lambda x: (
15
- int(re.search(r"\d+", os.path.basename(x)).group())
16
- if re.search(r"\d+", os.path.basename(x))
17
- else float("inf")
18
- ),
19
- )
20
 
21
- yaml_files = [
22
- os.path.join(template_yaml_folder, yml)
23
- for yml in os.listdir(template_yaml_folder)
24
- if yml.lower().endswith(".yaml")
25
- ]
26
-
27
- yaml_files_numbered = sorted(
28
- [yml for yml in yaml_files if re.match(r"^\d", os.path.basename(yml))],
29
- key=lambda x: (
30
- int(re.search(r"\d+", os.path.basename(x)).group())
31
- if re.search(r"\d+", os.path.basename(x))
32
- else float("inf")
33
- ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  )
 
35
 
36
- yaml_files_c_letter = [
37
- yml for yml in yaml_files if re.match(r"^[cC]", os.path.basename(yml))
38
- ]
39
 
40
- name_yaml_files_c_letter_cleaned = [
41
- (
42
- os.path.basename(yml)[2:]
43
- if os.path.basename(yml).startswith("c_")
44
- else os.path.basename(yml)
45
- )
46
- for yml in yaml_files_c_letter
47
- ]
48
 
49
 
50
  def get_yaml_content(yaml_path):
@@ -57,39 +73,52 @@ def get_yaml_content(yaml_path):
57
  with gr.Blocks() as templating_block:
58
  with gr.Row(variant="panel"):
59
  with gr.Column(scale=2):
60
- dropdown_selection_template = gr.Dropdown(
61
- label="Choice template",
62
- info="template info",
63
- value="Simple",
64
- choices=["Simple", "Nested", "Custom"],
65
- max_choices=1,
66
- interactive=True,
67
- )
68
- template_image = gr.Image(
69
- label="Example Templates", value=image_files[0], height=400
70
- )
71
 
72
- custom_dropdown = gr.Dropdown(
73
- label="Custom template",
74
- info="Choice a different custom templates...",
75
- value=name_yaml_files_c_letter_cleaned[0],
76
- choices=name_yaml_files_c_letter_cleaned,
77
- max_choices=1,
78
- interactive=True,
79
- visible=False,
80
- )
81
 
82
- output_yaml_code = gr.Code(
83
- language="yaml",
84
- label="yaml",
85
- interactive=True,
86
- visible=True,
87
- lines=15,
88
- )
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  @dropdown_selection_template.select(
91
  inputs=dropdown_selection_template,
92
- outputs=[template_image, output_yaml_code, custom_dropdown],
 
 
 
 
93
  )
94
  def on_template_select(dropdown_selection_template):
95
 
@@ -107,8 +136,37 @@ with gr.Blocks() as templating_block:
107
  f"{dropdown_selection_template} - is not a valid Template selection"
108
  )
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  templating_block.load(
111
  fn=on_template_select,
112
  inputs=dropdown_selection_template,
113
- outputs=[template_image, output_yaml_code, custom_dropdown],
 
 
 
 
114
  )
 
2
  import os
3
  import re
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def get_sorted_files(folder, extensions):
7
+ """Retrieve sorted files by numeric value in their names."""
8
+ return sorted(
9
+ [
10
+ os.path.join(folder, file)
11
+ for file in os.listdir(folder)
12
+ if file.lower().endswith(extensions)
13
+ ],
14
+ key=lambda x: (
15
+ int(re.search(r"\d+", os.path.basename(x)).group())
16
+ if re.search(r"\d+", os.path.basename(x))
17
+ else float("inf")
18
+ ),
19
+ )
20
+
21
+
22
+ def filter_files_by_prefix(files, prefix_pattern):
23
+ """Filter files based on a regex prefix pattern."""
24
+ return [file for file in files if re.match(prefix_pattern, os.path.basename(file))]
25
+
26
+
27
+ def clean_file_names(files, prefix_to_remove):
28
+ """Clean filenames by removing a specific prefix if present."""
29
+ return [
30
+ (
31
+ os.path.basename(file)[len(prefix_to_remove) :]
32
+ if os.path.basename(file).startswith(prefix_to_remove)
33
+ else os.path.basename(file)
34
+ )
35
+ for file in files
36
+ ]
37
+
38
+
39
+ def get_yaml_content(yaml_path):
40
+ """Read and return YAML content from a file."""
41
+ if os.path.isfile(yaml_path):
42
+ with open(yaml_path, "r") as file:
43
+ return file.read()
44
+ return "YAML content not available"
45
+
46
+
47
+ # Folder Paths
48
+ TEMPLATE_IMAGE_FOLDER = "app/assets/images"
49
+ TEMPLATE_YAML_FOLDER = "app/assets/templates"
50
+
51
+ # File Retrieval
52
+ image_files = get_sorted_files(
53
+ TEMPLATE_IMAGE_FOLDER, (".png", ".jpg", ".jpeg", ".webp")
54
  )
55
+ yaml_files = get_sorted_files(TEMPLATE_YAML_FOLDER, (".yaml",))
56
 
57
+ # Categorize YAML Files
58
+ yaml_files_numbered = filter_files_by_prefix(yaml_files, r"^\d")
59
+ yaml_files_c_letter = filter_files_by_prefix(yaml_files, r"^[cC]")
60
 
61
+ # Create Mappings
62
+ name_yaml_files_c_letter_cleaned = clean_file_names(yaml_files_c_letter, "c_")
63
+ name_to_yaml_map = dict(zip(name_yaml_files_c_letter_cleaned, yaml_files_c_letter))
 
 
 
 
 
64
 
65
 
66
  def get_yaml_content(yaml_path):
 
73
  with gr.Blocks() as templating_block:
74
  with gr.Row(variant="panel"):
75
  with gr.Column(scale=2):
76
+ with gr.Row():
77
+ dropdown_selection_template = gr.Dropdown(
78
+ label="Choose template",
79
+ info="template info",
80
+ value="Simple",
81
+ choices=["Simple", "Nested", "Custom"],
82
+ multiselect=False,
83
+ interactive=True,
84
+ )
 
 
85
 
86
+ custom_dropdown_selection_template = gr.Dropdown(
87
+ label="Custom template",
88
+ info="Choice a different custom templates...",
89
+ value=name_yaml_files_c_letter_cleaned[0],
90
+ choices=name_yaml_files_c_letter_cleaned,
91
+ multiselect=False,
92
+ interactive=True,
93
+ visible=False,
94
+ )
95
 
96
+ with gr.Group():
97
+ with gr.Row():
98
+ with gr.Column(scale=1):
99
+ template_image = gr.Image(
100
+ label="Example Templates", value=image_files[0], height=400
101
+ )
102
+ with gr.Column(scale=1):
103
+
104
+ template_output_yaml_code = gr.Code(
105
+ language="yaml",
106
+ label="Pipeline",
107
+ interactive=True,
108
+ visible=True,
109
+ )
110
+ docs_link = gr.HTML(
111
+ value='<p><a href="https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines" target="_blank">📚 Click here 📚</a> for a detailed description on how to customize the configuration</p>',
112
+ visible=True,
113
+ )
114
 
115
  @dropdown_selection_template.select(
116
  inputs=dropdown_selection_template,
117
+ outputs=[
118
+ template_image,
119
+ template_output_yaml_code,
120
+ custom_dropdown_selection_template,
121
+ ],
122
  )
123
  def on_template_select(dropdown_selection_template):
124
 
 
136
  f"{dropdown_selection_template} - is not a valid Template selection"
137
  )
138
 
139
+ @custom_dropdown_selection_template.select(
140
+ inputs=custom_dropdown_selection_template,
141
+ outputs=[template_output_yaml_code],
142
+ )
143
+ def on_custom_template_select(custom_template_selection):
144
+ yaml_path = name_to_yaml_map.get(custom_template_selection)
145
+
146
+ if yaml_path:
147
+ yaml_content = get_yaml_content(yaml_path)
148
+ return yaml_content
149
+ else:
150
+ return gr.Error(
151
+ f"{custom_template_selection} - is not a valid Custom Template selection"
152
+ )
153
+
154
+ @dropdown_selection_template.select(
155
+ inputs=dropdown_selection_template,
156
+ outputs=[template_output_yaml_code],
157
+ )
158
+ def check_for_custom_template(dropdown_selection_template):
159
+ if dropdown_selection_template == "Custom":
160
+ return gr.update(visible=True)
161
+ else:
162
+ return gr.skip()
163
+
164
  templating_block.load(
165
  fn=on_template_select,
166
  inputs=dropdown_selection_template,
167
+ outputs=[
168
+ template_image,
169
+ template_output_yaml_code,
170
+ custom_dropdown_selection_template,
171
+ ],
172
  )