carpelan commited on
Commit
8d18f89
·
1 Parent(s): 5efd7b1

Finished export tab

Browse files
app/gradio_config.py CHANGED
@@ -102,4 +102,17 @@ hr.region-divider {
102
  gap: 1rem;
103
  text-align: center;
104
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  """
 
102
  gap: 1rem;
103
  text-align: center;
104
  }
105
+
106
+ .modal-block {
107
+ width: 60%;
108
+ padding: 1rem;
109
+ }
110
+
111
+ @media (max-width: 1024px) { /* mobile and standing iPads */
112
+ .modal-block {
113
+ width: 100%;
114
+ }
115
+ }
116
+
117
+
118
  """
app/main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
 
3
  import gradio as gr
@@ -10,6 +11,10 @@ from app.tabs.submit import collection_submit_state, submit
10
  from app.tabs.visualizer import collection as collection_viz_state
11
  from app.tabs.visualizer import visualizer
12
 
 
 
 
 
13
  TEMPLATE_YAML_FOLDER = "app/assets/templates"
14
  gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
15
 
@@ -28,7 +33,7 @@ def load_markdown(language, section, content_dir="app/content"):
28
 
29
 
30
  def activate_tab(collection):
31
- return gr.update(interactive = collection is not None)
32
 
33
 
34
  matomo = """
 
1
+ import logging
2
  import os
3
 
4
  import gradio as gr
 
11
  from app.tabs.visualizer import collection as collection_viz_state
12
  from app.tabs.visualizer import visualizer
13
 
14
+ # Suppress transformers logging
15
+ logging.getLogger("transformers").setLevel(logging.ERROR)
16
+
17
+
18
  TEMPLATE_YAML_FOLDER = "app/assets/templates"
19
  gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
20
 
 
33
 
34
 
35
  def activate_tab(collection):
36
+ return gr.update(interactive=collection is not None)
37
 
38
 
39
  matomo = """
app/tabs/export.py CHANGED
@@ -1,67 +1,97 @@
 
 
 
 
1
  import gradio as gr
2
- import yaml
3
- from htrflow.pipeline.pipeline import Pipeline
4
  from htrflow.volume.volume import Collection
5
 
 
 
 
 
6
 
7
- def run_htrflow(custom_template_yaml, collection, progress=gr.Progress()):
 
8
  """
9
- Executes the HTRflow pipeline based on the provided YAML configuration and batch images.
10
- Args:
11
- custom_template_yaml (str): YAML string specifying the HTRflow pipeline configuration.
12
- batch_image_gallery (list): List of uploaded images to process in the pipeline.
13
- Returns:
14
- tuple: A collection of processed items, list of exported file paths, and a Gradio update object.
15
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- if custom_template_yaml is None or len(custom_template_yaml) < 1:
18
- gr.Warning("HTRflow: Please insert a HTRflow-yaml template")
19
- try:
20
- config = yaml.safe_load(custom_template_yaml)
21
- except Exception as e:
22
- gr.Warning(f"HTRflow: Error loading YAML configuration: {e}")
23
  return gr.skip()
24
 
25
- pipe = Pipeline.from_config(config)
 
26
 
27
- collection: Collection = pipe.run(collection, progress=progress)
 
 
 
 
 
28
 
29
- gr.Info("HTRflow: Export complete!")
30
 
31
- yield collection, gr.skip()
32
 
33
 
34
  with gr.Blocks() as export:
35
  collection = gr.State()
 
36
 
37
  gr.Markdown("## Export")
38
- with gr.Group():
39
- with gr.Row(equal_height=True):
40
- with gr.Column(scale=1):
41
- selected_output = gr.Dropdown(
42
- label="Export file format",
43
- info="Select (multiple) what export format you want",
44
- choices=["txt", "alto", "page", "json"],
 
 
45
  multiselect=True,
46
  interactive=True,
47
  )
48
- name_of_files = gr.Textbox(
49
- label="File name",
50
- info="All files will be given the same name with a suffix of the file extension",
51
- placeholder="my_htr_file",
52
- )
53
 
54
- with gr.Column(scale=1):
55
- download_files = gr.Files(interactive=False)
56
  with gr.Row():
57
  export_button = gr.Button("Export", scale=0, min_width=200, variant="primary")
58
 
59
- @export_button.click(inputs=[], outputs=[])
60
- def blable():
61
- pass
62
-
63
-
64
- # TODO: test pylaia works...
65
- # TODO: add other pipeliens for other language like english and hebrew model?
66
- # TODO: add other pipeliens for other language like english and hebrew model?
67
- # TODO kolla över toast. toast vid export?
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+
5
  import gradio as gr
 
 
6
  from htrflow.volume.volume import Collection
7
 
8
+ DEFAULT_C = "txt"
9
+ CHOICES = ["txt", "alto", "page", "json"]
10
+
11
+ current_dir = Path(__file__).parent
12
 
13
+
14
+ def rename_files_in_directory(directory, fmt):
15
  """
16
+ If fmt is "alto" or "page", rename each file in the directory so that its
17
+ base name ends with _{fmt} (if it doesn't already). For other formats, leave
18
+ the file names unchanged.
19
+ Returns a list of the (new or original) file paths.
 
 
20
  """
21
+ renamed = []
22
+ for root, _, files in os.walk(directory):
23
+ for file in files:
24
+ old_path = os.path.join(root, file)
25
+
26
+ if fmt in ["alto", "page"]:
27
+ name, ext = os.path.splitext(file)
28
+
29
+ if not name.endswith(f"_{fmt}"):
30
+ new_name = f"{name}_{fmt}{ext}"
31
+ new_path = os.path.join(root, new_name)
32
+ os.rename(old_path, new_path)
33
+ renamed.append(new_path)
34
+ else:
35
+ renamed.append(old_path)
36
+ else:
37
+ renamed.append(old_path)
38
+ return renamed
39
+
40
+
41
+ def export_files(file_formats, collection: Collection, req: gr.Request):
42
+ if len(file_formats) < 1:
43
+ gr.Warning("No export file format was selected. Please select a File format")
44
+ return gr.skip()
45
 
46
+ if collection is None:
47
+ gr.Warning("No image has been transcribed yet. Please go to the Upload tab")
 
 
 
 
48
  return gr.skip()
49
 
50
+ temp_user_dir = current_dir / str(req.session_hash)
51
+ temp_user_dir.mkdir(exist_ok=True)
52
 
53
+ all_renamed_files = []
54
+ for fmt in file_formats:
55
+ temp_user_file_dir = os.path.join(temp_user_dir, fmt)
56
+ collection.save(directory=temp_user_file_dir, serializer=fmt)
57
+ renamed = rename_files_in_directory(temp_user_file_dir, fmt)
58
+ all_renamed_files.extend(renamed)
59
 
60
+ unique_files = list(dict.fromkeys(all_renamed_files))
61
 
62
+ return unique_files, temp_user_dir
63
 
64
 
65
  with gr.Blocks() as export:
66
  collection = gr.State()
67
+ temp_state = gr.State()
68
 
69
  gr.Markdown("## Export")
70
+ gr.Markdown("Choose file format for export.")
71
+ with gr.Row():
72
+ with gr.Column(scale=1):
73
+ with gr.Group():
74
+ export_file_format = gr.Dropdown(
75
+ value=DEFAULT_C,
76
+ label="File format",
77
+ info="Select export format(s)",
78
+ choices=CHOICES,
79
  multiselect=True,
80
  interactive=True,
81
  )
82
+ download_files = gr.Files(label="Download files", interactive=False)
83
+ with gr.Column(scale=1):
84
+ pass
 
 
85
 
 
 
86
  with gr.Row():
87
  export_button = gr.Button("Export", scale=0, min_width=200, variant="primary")
88
 
89
+ export_button.click(
90
+ fn=export_files,
91
+ inputs=[export_file_format, collection],
92
+ outputs=[download_files, temp_state],
93
+ ).then(
94
+ fn=lambda folder: shutil.rmtree(folder) if folder else None,
95
+ inputs=temp_state,
96
+ outputs=None,
97
+ )
app/tabs/submit.py CHANGED
@@ -6,7 +6,7 @@ import gradio as gr
6
  import yaml
7
  from gradio_modal import Modal
8
  from htrflow.pipeline.pipeline import Pipeline
9
- from htrflow.pipeline.steps import auto_import, init_step
10
  from htrflow.volume.volume import Collection
11
 
12
  logger = logging.getLogger(__name__)
@@ -106,22 +106,20 @@ def run_htrflow(custom_template_yaml, batch_image_gallery, progress=gr.Progress(
106
 
107
  images = [temp_img[0] for temp_img in batch_image_gallery]
108
 
 
 
109
  pipe = PipelineWithProgress.from_config(config)
110
- collections = auto_import(images)
111
 
112
  gr.Info(f"HTRflow: processing {len(images)} {'image' if len(images) == 1 else 'images'}.")
113
  progress(0.1, desc="HTRflow: Processing")
114
 
115
- for collection in collections:
116
- if "labels" in config:
117
- collection.set_label_format(**config["labels"])
118
 
119
- collection.label = "HTRflow_demo_output"
120
- collection: Collection = pipe.run(collection, progress=progress)
121
 
122
- progress(1, desc="HTRflow: Finish")
123
- time.sleep(1)
124
- gr.Info("HTRflow: Finish")
125
 
126
  yield collection, gr.skip()
127
 
@@ -160,7 +158,6 @@ def get_selected_example_image(event: gr.SelectData) -> str:
160
  """
161
  Get path to the selected example image.
162
  """
163
- print([event.value["image"]["path"]])
164
  return [event.value["image"]["path"]]
165
 
166
 
@@ -205,22 +202,21 @@ with gr.Blocks() as submit:
205
  object_fit="scale-down",
206
  min_width=250,
207
  )
208
-
209
- image_id = gr.Textbox(
210
  label="Upload by image ID",
211
  info=(
212
  "Use any image from our digitized archives by pasting its image ID found in the "
213
  "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
214
  "Press enter to submit."
215
  ),
216
- placeholder="R0002231_00005",
217
  )
218
 
219
- gr.Markdown("## Settings")
220
- gr.Markdown("Select a pipeline that suits your image. You can edit the pipeline if you need to customize it further.")
221
-
222
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
223
- gr.HTML("Pipeline", elem_classes="pipeline-header", padding=False)
 
 
 
224
 
225
  with gr.Row():
226
  with gr.Column(scale=0):
@@ -232,8 +228,12 @@ with gr.Blocks() as submit:
232
  elem_classes="pipeline-dropdown",
233
  )
234
 
235
- with gr.Column():
236
  edit_pipeline_button = gr.Button("Edit", scale=0)
 
 
 
 
237
 
238
  pipeline_description = gr.HTML(
239
  value=get_pipeline_description,
@@ -242,23 +242,34 @@ with gr.Blocks() as submit:
242
  padding=False,
243
  )
244
 
245
- with Modal(visible=False) as edit_pipeline_modal:
246
- custom_template_yaml = gr.Code(
247
- value=get_yaml,
248
- inputs=pipeline_dropdown,
249
- language="yaml",
250
- container=False,
251
- )
252
- url = "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
253
- gr.HTML(
254
- f'See the <a href="{url}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
255
- padding=False,
256
- elem_classes="pipeline-help",
257
- )
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  with gr.Row():
260
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
261
- progess_bar = gr.Textbox(visible=False, show_label=False)
262
 
263
  @batch_image_gallery.upload(
264
  inputs=batch_image_gallery,
@@ -270,18 +281,20 @@ with gr.Blocks() as submit:
270
  return gr.update(value=None)
271
  return images
272
 
273
- image_id.submit(fn=get_image_from_image_id, inputs=image_id, outputs=batch_image_gallery)
 
 
 
 
 
 
 
 
274
 
275
  run_button.click(
276
- lambda: gr.update(visible=True),
277
- outputs=[progess_bar],
278
- ).then(
279
  fn=run_htrflow,
280
  inputs=[custom_template_yaml, batch_image_gallery],
281
- outputs=[collection_submit_state, progess_bar],
282
- ).then(
283
- lambda: gr.update(visible=False),
284
- outputs=[progess_bar],
285
  )
286
 
287
  examples.select(get_selected_example_image, None, batch_image_gallery)
 
6
  import yaml
7
  from gradio_modal import Modal
8
  from htrflow.pipeline.pipeline import Pipeline
9
+ from htrflow.pipeline.steps import init_step
10
  from htrflow.volume.volume import Collection
11
 
12
  logger = logging.getLogger(__name__)
 
106
 
107
  images = [temp_img[0] for temp_img in batch_image_gallery]
108
 
109
+ collection = Collection(images)
110
+
111
  pipe = PipelineWithProgress.from_config(config)
 
112
 
113
  gr.Info(f"HTRflow: processing {len(images)} {'image' if len(images) == 1 else 'images'}.")
114
  progress(0.1, desc="HTRflow: Processing")
115
 
116
+ collection.label = "demo_output"
 
 
117
 
118
+ collection = pipe.run(collection, progress=progress)
 
119
 
120
+ progress(1, desc="HTRflow: Finish, redirecting to 'Results tab'")
121
+ time.sleep(2)
122
+ gr.Info("Image were succesfully transcribed ✨")
123
 
124
  yield collection, gr.skip()
125
 
 
158
  """
159
  Get path to the selected example image.
160
  """
 
161
  return [event.value["image"]["path"]]
162
 
163
 
 
202
  object_fit="scale-down",
203
  min_width=250,
204
  )
205
+ image_iiif_url = gr.Textbox(
 
206
  label="Upload by image ID",
207
  info=(
208
  "Use any image from our digitized archives by pasting its image ID found in the "
209
  "<a href='https://sok.riksarkivet.se/bildvisning/R0002231_00005' target='_blank'>image viewer</a>. "
210
  "Press enter to submit."
211
  ),
212
+ placeholder="R0002231_00005, R0002231_00006",
213
  )
214
 
 
 
 
215
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
216
+ gr.Markdown("## Settings")
217
+ gr.Markdown(
218
+ "Select a pipeline that suits your image. You can edit the pipeline if you need to customize it further."
219
+ )
220
 
221
  with gr.Row():
222
  with gr.Column(scale=0):
 
228
  elem_classes="pipeline-dropdown",
229
  )
230
 
231
+ with gr.Column(scale=0, min_width=100):
232
  edit_pipeline_button = gr.Button("Edit", scale=0)
233
+ with gr.Column(scale=3):
234
+ progess_bar = gr.Textbox(visible=False, show_label=False)
235
+ with gr.Column(scale=0, min_width=20):
236
+ pass
237
 
238
  pipeline_description = gr.HTML(
239
  value=get_pipeline_description,
 
242
  padding=False,
243
  )
244
 
245
+ with Modal(
246
+ visible=False,
247
+ ) as edit_pipeline_modal:
248
+ with gr.Column():
249
+ gr.Markdown(
250
+ """
251
+ ## Edit Pipeline
252
+ The code snippet below is a YAML file that the HTRflow app uses to process the image. If you have chosen an
253
+ image from the "Examples" section, the YAML is already a pre-made template tailored to fit the example image.
254
+
255
+ Edit pipeline if needed:
256
+ """
257
+ )
258
+ custom_template_yaml = gr.Code(
259
+ value=get_yaml,
260
+ inputs=pipeline_dropdown,
261
+ language="yaml",
262
+ container=False,
263
+ )
264
+ url = "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
265
+ gr.HTML(
266
+ f'See the <a href="{url}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
267
+ padding=False,
268
+ elem_classes="pipeline-help",
269
+ )
270
 
271
  with gr.Row():
272
  run_button = gr.Button("Transcribe", variant="primary", scale=0, min_width=200)
 
273
 
274
  @batch_image_gallery.upload(
275
  inputs=batch_image_gallery,
 
281
  return gr.update(value=None)
282
  return images
283
 
284
+ def return_iiif_url(image_ids):
285
+ if isinstance(image_ids, str):
286
+ image_ids = image_ids.split(",")
287
+
288
+ return [
289
+ f"https://lbiiif.riksarkivet.se/arkis!{image_id.strip()}/full/max/0/default.jpg" for image_id in image_ids
290
+ ]
291
+
292
+ image_iiif_url.submit(fn=return_iiif_url, inputs=image_iiif_url, outputs=batch_image_gallery)
293
 
294
  run_button.click(
 
 
 
295
  fn=run_htrflow,
296
  inputs=[custom_template_yaml, batch_image_gallery],
297
+ outputs=[collection_submit_state, batch_image_gallery],
 
 
 
298
  )
299
 
300
  examples.select(get_selected_example_image, None, batch_image_gallery)
app/tabs/visualizer.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  from jinja2 import Environment, FileSystemLoader
3
 
4
  _ENV = Environment(loader=FileSystemLoader("app/assets/jinja-templates"))
@@ -6,21 +7,21 @@ _IMAGE_TEMPLATE = _ENV.get_template("image")
6
  _TRANSCRIPTION_TEMPLATE = _ENV.get_template("transcription")
7
 
8
 
9
- def render_image(collection, current_page_index):
10
  return _IMAGE_TEMPLATE.render(
11
  page=collection[current_page_index],
12
  lines=collection[current_page_index].traverse(lambda node: node.is_line()),
13
  )
14
 
15
 
16
- def render_transcription(collection, current_page_index):
17
  regions = collection[current_page_index].traverse(
18
  lambda node: node.children and all(child.is_line() for child in node)
19
  )
20
  return _TRANSCRIPTION_TEMPLATE.render(regions=regions)
21
 
22
 
23
- def toggle_navigation_button(collection):
24
  visible = len(collection.pages) > 1
25
  return gr.update(visible=visible)
26
 
@@ -30,12 +31,12 @@ def activate_left_button(current_page_index):
30
  return gr.update(interactive=interactive)
31
 
32
 
33
- def activate_right_button(collection, current_page_index):
34
  interactive = current_page_index + 1 < len(collection.pages)
35
  return gr.update(interactive=interactive)
36
 
37
 
38
- def right_button_click(collection, current_page_index):
39
  max_index = len(collection.pages) - 1
40
  return min(max_index, current_page_index + 1)
41
 
@@ -44,17 +45,18 @@ def left_button_click(current_page_index):
44
  return max(0, current_page_index - 1)
45
 
46
 
47
- def update_image_caption(collection, current_page_index):
48
  n_pages = len(collection.pages)
49
  return f"Image {current_page_index + 1} of {n_pages}: `{collection[current_page_index].label}`"
50
 
51
 
52
  with gr.Blocks() as visualizer:
53
  gr.Markdown("# Result")
54
- gr.Markdown("The image to the left shows where HTRflow found text in the image. The transcription can be seen to the right.")
 
 
55
 
56
  with gr.Row():
57
-
58
  # Annotated image panel
59
  with gr.Column(scale=2):
60
  gr.Markdown("## Annotated image")
@@ -70,7 +72,6 @@ with gr.Blocks() as visualizer:
70
  gr.Markdown("## Transcription")
71
  transcription = gr.HTML(elem_classes="transcription", container=True, max_height="60vh")
72
 
73
-
74
  collection = gr.State()
75
  current_page_index = gr.State(0)
76
 
 
1
  import gradio as gr
2
+ from htrflow.volume.volume import Collection
3
  from jinja2 import Environment, FileSystemLoader
4
 
5
  _ENV = Environment(loader=FileSystemLoader("app/assets/jinja-templates"))
 
7
  _TRANSCRIPTION_TEMPLATE = _ENV.get_template("transcription")
8
 
9
 
10
+ def render_image(collection: Collection, current_page_index):
11
  return _IMAGE_TEMPLATE.render(
12
  page=collection[current_page_index],
13
  lines=collection[current_page_index].traverse(lambda node: node.is_line()),
14
  )
15
 
16
 
17
+ def render_transcription(collection: Collection, current_page_index):
18
  regions = collection[current_page_index].traverse(
19
  lambda node: node.children and all(child.is_line() for child in node)
20
  )
21
  return _TRANSCRIPTION_TEMPLATE.render(regions=regions)
22
 
23
 
24
+ def toggle_navigation_button(collection: Collection):
25
  visible = len(collection.pages) > 1
26
  return gr.update(visible=visible)
27
 
 
31
  return gr.update(interactive=interactive)
32
 
33
 
34
+ def activate_right_button(collection: Collection, current_page_index):
35
  interactive = current_page_index + 1 < len(collection.pages)
36
  return gr.update(interactive=interactive)
37
 
38
 
39
+ def right_button_click(collection: Collection, current_page_index):
40
  max_index = len(collection.pages) - 1
41
  return min(max_index, current_page_index + 1)
42
 
 
45
  return max(0, current_page_index - 1)
46
 
47
 
48
+ def update_image_caption(collection: Collection, current_page_index):
49
  n_pages = len(collection.pages)
50
  return f"Image {current_page_index + 1} of {n_pages}: `{collection[current_page_index].label}`"
51
 
52
 
53
  with gr.Blocks() as visualizer:
54
  gr.Markdown("# Result")
55
+ gr.Markdown(
56
+ "The image to the left shows where HTRflow found text in the image. The transcription can be seen to the right."
57
+ )
58
 
59
  with gr.Row():
 
60
  # Annotated image panel
61
  with gr.Column(scale=2):
62
  gr.Markdown("## Annotated image")
 
72
  gr.Markdown("## Transcription")
73
  transcription = gr.HTML(elem_classes="transcription", container=True, max_height="60vh")
74
 
 
75
  collection = gr.State()
76
  current_page_index = gr.State(0)
77