phyloforfun commited on
Commit
b55e03e
Β·
1 Parent(s): d48e79a

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
Files changed (4) hide show
  1. app.py +117 -65
  2. pages/faqs.py +9 -6
  3. pages/prompt_builder.py +8 -5
  4. pages/report_bugs.py +8 -3
app.py CHANGED
@@ -18,7 +18,8 @@ from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, s
18
  from vouchervision.data_project import convert_pdf_to_jpg
19
  from vouchervision.utils_LLM import check_system_gpus
20
 
21
-
 
22
  #################################################################################################################################################
23
  # Initializations ###############################################################################################################################
24
  #################################################################################################################################################
@@ -27,12 +28,13 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
27
  # Parse the 'is_hf' argument and set it in session state
28
  if 'is_hf' not in st.session_state:
29
  is_hf_os = os.getenv('IS_HF', '').lower() # Get the environment variable and convert to lowercase for uniformity
 
30
  if is_hf_os in ['1', 'true']: # Check against string representations of truthy values
31
  st.session_state['is_hf'] = True
32
  else:
33
  st.session_state['is_hf'] = False
34
 
35
- print(f"is_hf {st.session_state['is_hf']}")
36
 
37
 
38
  # Default YAML file path
@@ -223,6 +225,28 @@ if 'dir_uploaded_images_small' not in st.session_state:
223
  ########################################################################################################
224
  ### CONTENT [] ####
225
  ########################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  def content_input_images(col_left, col_right):
227
  st.write('---')
228
  # col1, col2 = st.columns([2,8])
@@ -259,17 +283,8 @@ def content_input_images(col_left, col_right):
259
  n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
260
  # Update the input list for each page image
261
  converted_files = os.listdir(st.session_state['dir_uploaded_images'])
262
-
263
- for file_name in converted_files:
264
- if file_name.lower().endswith('.jpg'):
265
- jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
266
- st.session_state['input_list'].append(jpg_file_path)
267
-
268
- # Optionally, create a thumbnail for the gallery
269
- img = Image.open(jpg_file_path)
270
- img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
271
- file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
272
- st.session_state['input_list_small'].append(file_path_small)
273
  else:
274
  # Handle JPG/JPEG files (existing process)
275
  file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
@@ -289,7 +304,7 @@ def content_input_images(col_left, col_right):
289
  else:
290
  # If there are less than 100 images, take them all
291
  images_to_display = st.session_state['input_list_small']
292
- st.image(images_to_display)
293
 
294
  else:
295
  st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
@@ -301,7 +316,7 @@ def content_input_images(col_left, col_right):
301
  info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
302
  st.info(info_txt)
303
  try:
304
- st.image(st.session_state['input_list_small'], width=GALLERY_IMAGE_SIZE)
305
  except:
306
  pass
307
 
@@ -1117,7 +1132,7 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
1117
 
1118
 
1119
 
1120
-
1121
  def show_header_welcome():
1122
  st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
1123
  st.session_state.logo = Image.open(st.session_state.logo_path)
@@ -1400,14 +1415,13 @@ def content_header():
1400
  ct_left, ct_right = st.columns([1,1])
1401
  with ct_left:
1402
  st.button("Refresh", on_click=refresh, use_container_width=True)
1403
- with ct_right:
1404
- # st.page_link(os.path.join(os.path.dirname(__file__),"pages","faqs.py"), label="FAQs", icon="❔")
1405
- st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
 
 
1406
 
1407
- # if st.button('FAQs', use_container_width=True):
1408
- # st.session_state.proceed_to_faqs = True
1409
- # st.session_state.proceed_to_main = False
1410
- # st.rerun()
1411
 
1412
  # with col_run_2:
1413
  # if st.button("Test GPT"):
@@ -1495,7 +1509,7 @@ def content_project_settings(col):
1495
  st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
1496
 
1497
 
1498
-
1499
  def content_llm_cost():
1500
  st.write("---")
1501
  st.header('LLM Cost Calculator')
@@ -1530,13 +1544,29 @@ def content_llm_cost():
1530
  n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
1531
 
1532
  # Function to find the model's Input and Output values
 
1533
  def find_model_values(model, all_dfs):
1534
  for df in all_dfs:
1535
  if model in df.keys():
1536
  return df[model]['in'], df[model]['out']
1537
  return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
 
1539
- # Calculate and display cost when button is pressed
1540
  input_value, output_value = find_model_values(selected_model,
1541
  [st.session_state['cost_openai'], st.session_state['cost_azure'], st.session_state['cost_google'], st.session_state['cost_mistral'], st.session_state['cost_local']])
1542
  if input_value is not None and output_value is not None:
@@ -1544,17 +1574,18 @@ def content_llm_cost():
1544
  with calculator_5:
1545
  st.text_input("Total Cost", f"${round(cost,2)}") # selected_model
1546
 
 
1547
  with col_cost_1:
1548
- rounding = 4
1549
- st.dataframe(st.session_state.styled_cost_openai.format(precision=rounding), hide_index=True,)
1550
  with col_cost_2:
1551
- st.dataframe(st.session_state.styled_cost_azure.format(precision=rounding), hide_index=True,)
1552
  with col_cost_3:
1553
- st.dataframe(st.session_state.styled_cost_google.format(precision=rounding), hide_index=True,)
1554
  with col_cost_4:
1555
- st.dataframe(st.session_state.styled_cost_mistral.format(precision=rounding), hide_index=True,)
1556
  with col_cost_5:
1557
- st.dataframe(st.session_state.styled_cost_local.format(precision=rounding), hide_index=True,)
 
1558
 
1559
 
1560
 
@@ -1572,10 +1603,12 @@ def content_prompt_and_llm_version():
1572
  selected_version = default_version
1573
  st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
1574
 
1575
- with col_prompt_2:
1576
- # if st.button("Build Custom LLM Prompt"):
1577
- # st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
1578
- st.page_link(os.path.join("pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
 
 
1579
 
1580
 
1581
  st.header('LLM Version')
@@ -1785,10 +1818,29 @@ def content_ocr_method():
1785
  # elif (OCR_option == 'hand') and do_use_trOCR:
1786
  # st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
1787
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1788
 
 
 
 
1789
 
1790
  def content_collage_overlay():
1791
- st.write("---")
1792
  col_collage, col_overlay = st.columns([4,4])
1793
 
1794
 
@@ -1797,7 +1849,7 @@ def content_collage_overlay():
1797
  st.header('LeafMachine2 Label Collage')
1798
  st.info("NOTE: We strongly recommend enabling LeafMachine2 cropping if your images are full sized herbarium sheet. Often, the OCR algorithm struggles with full sheets, but works well with the collage images. We have disabled the collage by default for this Hugging Face Space because the Space lacks a GPU and the collage creation takes a bit longer.")
1799
  default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
1800
- st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. Showing just the text labels to the OCR algorithms significantly improves performance. This runs slowly on the free Hugging Face Space, but runs quickly with a fast CPU or any GPU.")
1801
  st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox(":rainbow[Use LeafMachine2 label collage for transcriptions]", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
1802
 
1803
 
@@ -1805,33 +1857,17 @@ def content_collage_overlay():
1805
  options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1806
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
1807
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
1808
- # Load the image only if it's not already in the session state
1809
- if "demo_collage" not in st.session_state:
1810
- # ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
1811
- ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
1812
- st.session_state["demo_collage"] = Image.open(ba)
1813
-
1814
- # Display the image
1815
- with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
1816
- st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
1817
- # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
1818
 
1819
  with col_overlay:
1820
  st.header('OCR Overlay Image')
1821
 
1822
- st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
1823
 
1824
  do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
1825
  st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
1826
-
1827
- if "demo_overlay" not in st.session_state:
1828
- # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
1829
- ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2.png')
1830
- st.session_state["demo_overlay"] = Image.open(ocr)
1831
 
1832
- with st.expander(":frame_with_picture: View an example of the OCR overlay image"):
1833
- st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
1834
- # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
1835
 
1836
 
1837
 
@@ -1909,10 +1945,10 @@ def content_processing_options():
1909
  with col_v2:
1910
 
1911
 
1912
- print(f"Number of GPUs: {st.session_state.num_gpus}")
1913
- print(f"GPU Details: {st.session_state.gpu_dict}")
1914
- print(f"Total VRAM: {st.session_state.total_vram_gb} GB")
1915
- print(f"Capability Score: {st.session_state.capability_score}")
1916
 
1917
  st.header('System GPU Information')
1918
  st.markdown(f"**Torch CUDA:** {torch.cuda.is_available()}")
@@ -1989,6 +2025,7 @@ def content_space_saver():
1989
  #################################################################################################################################################
1990
  # render_expense_report_summary #################################################################################################################
1991
  #################################################################################################################################################
 
1992
  def render_expense_report_summary():
1993
  expense_summary = st.session_state.expense_summary
1994
  expense_report = st.session_state.expense_report
@@ -2099,12 +2136,8 @@ def content_less_used():
2099
  #################################################################################################################################################
2100
  # Sidebar #######################################################################################################################################
2101
  #################################################################################################################################################
 
2102
  def sidebar_content():
2103
- # st.page_link(os.path.join(os.path.dirname(__file__),'app.py'), label="Home", icon="🏠")
2104
- # st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
2105
- # st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)
2106
- # st.page_link("http://www.google.com", label="Google", icon="🌎")
2107
-
2108
  if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
2109
  validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
2110
  expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
@@ -2156,11 +2189,21 @@ def main():
2156
  #################################################################################################################################################
2157
  # Main ##########################################################################################################################################
2158
  #################################################################################################################################################
 
2159
  if st.session_state['is_hf']:
2160
  # if st.session_state.proceed_to_build_llm_prompt:
2161
  # build_LLM_prompt_config()
2162
  if st.session_state.proceed_to_main:
 
 
 
 
2163
  main()
 
 
 
 
 
2164
 
2165
  else:
2166
  if not st.session_state.private_file:
@@ -2170,8 +2213,17 @@ else:
2170
  elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
2171
  create_private_file()
2172
  elif st.session_state.proceed_to_main:
 
 
 
 
2173
  main()
2174
 
 
 
 
 
 
2175
 
2176
 
2177
 
 
18
  from vouchervision.data_project import convert_pdf_to_jpg
19
  from vouchervision.utils_LLM import check_system_gpus
20
 
21
+ import cProfile
22
+ import pstats
23
  #################################################################################################################################################
24
  # Initializations ###############################################################################################################################
25
  #################################################################################################################################################
 
28
  # Parse the 'is_hf' argument and set it in session state
29
  if 'is_hf' not in st.session_state:
30
  is_hf_os = os.getenv('IS_HF', '').lower() # Get the environment variable and convert to lowercase for uniformity
31
+ print(f"=== os.getenv('IS_HF', '').lower() ===> {is_hf_os} ===")
32
  if is_hf_os in ['1', 'true']: # Check against string representations of truthy values
33
  st.session_state['is_hf'] = True
34
  else:
35
  st.session_state['is_hf'] = False
36
 
37
+ print(f"=== is_hf {st.session_state['is_hf']} ===")
38
 
39
 
40
  # Default YAML file path
 
225
  ########################################################################################################
226
  ### CONTENT [] ####
227
  ########################################################################################################
228
+ @st.cache_data
229
+ def show_gallery_small():
230
+ st.image(st.session_state['input_list_small'], width=GALLERY_IMAGE_SIZE)
231
+
232
+ @st.cache_data
233
+ def show_gallery_small_hf(images_to_display):
234
+ st.image(images_to_display)
235
+
236
+
237
+ @st.cache_data
238
+ def load_gallery(converted_files, uploaded_file):
239
+ for file_name in converted_files:
240
+ if file_name.lower().endswith('.jpg'):
241
+ jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
242
+ st.session_state['input_list'].append(jpg_file_path)
243
+
244
+ # Optionally, create a thumbnail for the gallery
245
+ img = Image.open(jpg_file_path)
246
+ img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
247
+ file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
248
+ st.session_state['input_list_small'].append(file_path_small)
249
+
250
  def content_input_images(col_left, col_right):
251
  st.write('---')
252
  # col1, col2 = st.columns([2,8])
 
283
  n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
284
  # Update the input list for each page image
285
  converted_files = os.listdir(st.session_state['dir_uploaded_images'])
286
+ load_gallery(converted_files, uploaded_file)
287
+
 
 
 
 
 
 
 
 
 
288
  else:
289
  # Handle JPG/JPEG files (existing process)
290
  file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
 
304
  else:
305
  # If there are less than 100 images, take them all
306
  images_to_display = st.session_state['input_list_small']
307
+ show_gallery_small_hf(images_to_display)
308
 
309
  else:
310
  st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
 
316
  info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
317
  st.info(info_txt)
318
  try:
319
+ show_gallery_small()
320
  except:
321
  pass
322
 
 
1132
 
1133
 
1134
 
1135
+ @st.cache_data
1136
  def show_header_welcome():
1137
  st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
1138
  st.session_state.logo = Image.open(st.session_state.logo_path)
 
1415
  ct_left, ct_right = st.columns([1,1])
1416
  with ct_left:
1417
  st.button("Refresh", on_click=refresh, use_container_width=True)
1418
+ # with ct_right:
1419
+ # try:
1420
+ # st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
1421
+ # except:
1422
+ # st.page_link(os.path.join(os.path.dirname(__file__),"pages","faqs.py"), label="FAQs", icon="❔")
1423
 
1424
+
 
 
 
1425
 
1426
  # with col_run_2:
1427
  # if st.button("Test GPT"):
 
1509
  st.session_state.config['leafmachine']['project']['dir_output'] = st.text_input("Output directory", st.session_state.config['leafmachine']['project'].get('dir_output', ''))
1510
 
1511
 
1512
+ # @st.cache_data
1513
  def content_llm_cost():
1514
  st.write("---")
1515
  st.header('LLM Cost Calculator')
 
1544
  n_img = st.number_input("Number of Images", min_value=0, value=1000, step=100)
1545
 
1546
  # Function to find the model's Input and Output values
1547
+ @st.cache_data
1548
  def find_model_values(model, all_dfs):
1549
  for df in all_dfs:
1550
  if model in df.keys():
1551
  return df[model]['in'], df[model]['out']
1552
  return None, None
1553
+
1554
+ @st.cache_data
1555
+ def show_cost_matrix_1(rounding):
1556
+ st.dataframe(st.session_state.styled_cost_openai.format(precision=rounding), hide_index=True,)
1557
+ @st.cache_data
1558
+ def show_cost_matrix_2(rounding):
1559
+ st.dataframe(st.session_state.styled_cost_azure.format(precision=rounding), hide_index=True,)
1560
+ @st.cache_data
1561
+ def show_cost_matrix_3(rounding):
1562
+ st.dataframe(st.session_state.styled_cost_google.format(precision=rounding), hide_index=True,)
1563
+ @st.cache_data
1564
+ def show_cost_matrix_4(rounding):
1565
+ st.dataframe(st.session_state.styled_cost_mistral.format(precision=rounding), hide_index=True,)
1566
+ @st.cache_data
1567
+ def show_cost_matrix_5(rounding):
1568
+ st.dataframe(st.session_state.styled_cost_local.format(precision=rounding), hide_index=True,)
1569
 
 
1570
  input_value, output_value = find_model_values(selected_model,
1571
  [st.session_state['cost_openai'], st.session_state['cost_azure'], st.session_state['cost_google'], st.session_state['cost_mistral'], st.session_state['cost_local']])
1572
  if input_value is not None and output_value is not None:
 
1574
  with calculator_5:
1575
  st.text_input("Total Cost", f"${round(cost,2)}") # selected_model
1576
 
1577
+ rounding = 4
1578
  with col_cost_1:
1579
+ show_cost_matrix_1(rounding)
 
1580
  with col_cost_2:
1581
+ show_cost_matrix_2(rounding)
1582
  with col_cost_3:
1583
+ show_cost_matrix_3(rounding)
1584
  with col_cost_4:
1585
+ show_cost_matrix_4(rounding)
1586
  with col_cost_5:
1587
+ show_cost_matrix_5(rounding)
1588
+
1589
 
1590
 
1591
 
 
1603
  selected_version = default_version
1604
  st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
1605
 
1606
+ # with col_prompt_2:
1607
+ # # if st.button("Build Custom LLM Prompt"):
1608
+ # try:
1609
+ # st.page_link(os.path.join("pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
1610
+ # except:
1611
+ # st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
1612
 
1613
 
1614
  st.header('LLM Version')
 
1818
  # elif (OCR_option == 'hand') and do_use_trOCR:
1819
  # st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
1820
 
1821
+ @st.cache_data
1822
+ def show_collage():
1823
+ # Load the image only if it's not already in the session state
1824
+ if "demo_collage" not in st.session_state:
1825
+ # ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
1826
+ ba = os.path.join(st.session_state.dir_home, 'demo', 'ba', 'ba2.png')
1827
+ st.session_state["demo_collage"] = Image.open(ba)
1828
+ with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
1829
+ st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
1830
+
1831
+ @st.cache_data
1832
+ def show_ocr():
1833
+ if "demo_overlay" not in st.session_state:
1834
+ # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
1835
+ ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2.png')
1836
+ st.session_state["demo_overlay"] = Image.open(ocr)
1837
 
1838
+ with st.expander(":frame_with_picture: View an example of the OCR overlay image"):
1839
+ st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
1840
+ # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
1841
 
1842
  def content_collage_overlay():
1843
+ st.markdown("---")
1844
  col_collage, col_overlay = st.columns([4,4])
1845
 
1846
 
 
1849
  st.header('LeafMachine2 Label Collage')
1850
  st.info("NOTE: We strongly recommend enabling LeafMachine2 cropping if your images are full sized herbarium sheet. Often, the OCR algorithm struggles with full sheets, but works well with the collage images. We have disabled the collage by default for this Hugging Face Space because the Space lacks a GPU and the collage creation takes a bit longer.")
1851
  default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
1852
+ st.markdown("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. Showing just the text labels to the OCR algorithms significantly improves performance. This runs slowly on the free Hugging Face Space, but runs quickly with a fast CPU or any GPU.")
1853
  st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox(":rainbow[Use LeafMachine2 label collage for transcriptions]", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
1854
 
1855
 
 
1857
  options=['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1858
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
1859
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = option_selected_crops
1860
+ show_collage()
 
 
 
 
 
 
 
 
 
1861
 
1862
  with col_overlay:
1863
  st.header('OCR Overlay Image')
1864
 
1865
+ st.markdown('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
1866
 
1867
  do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
1868
  st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
1869
+ show_ocr()
 
 
 
 
1870
 
 
 
 
1871
 
1872
 
1873
 
 
1945
  with col_v2:
1946
 
1947
 
1948
+ # print(f"Number of GPUs: {st.session_state.num_gpus}")
1949
+ # print(f"GPU Details: {st.session_state.gpu_dict}")
1950
+ # print(f"Total VRAM: {st.session_state.total_vram_gb} GB")
1951
+ # print(f"Capability Score: {st.session_state.capability_score}")
1952
 
1953
  st.header('System GPU Information')
1954
  st.markdown(f"**Torch CUDA:** {torch.cuda.is_available()}")
 
2025
  #################################################################################################################################################
2026
  # render_expense_report_summary #################################################################################################################
2027
  #################################################################################################################################################
2028
+ @st.cache_data
2029
  def render_expense_report_summary():
2030
  expense_summary = st.session_state.expense_summary
2031
  expense_report = st.session_state.expense_report
 
2136
  #################################################################################################################################################
2137
  # Sidebar #######################################################################################################################################
2138
  #################################################################################################################################################
2139
+ @st.cache_data
2140
  def sidebar_content():
 
 
 
 
 
2141
  if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
2142
  validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
2143
  expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
 
2189
  #################################################################################################################################################
2190
  # Main ##########################################################################################################################################
2191
  #################################################################################################################################################
2192
+ do_print_profiler = False
2193
  if st.session_state['is_hf']:
2194
  # if st.session_state.proceed_to_build_llm_prompt:
2195
  # build_LLM_prompt_config()
2196
  if st.session_state.proceed_to_main:
2197
+ if do_print_profiler:
2198
+ profiler = cProfile.Profile()
2199
+ profiler.enable()
2200
+
2201
  main()
2202
+
2203
+ if do_print_profiler:
2204
+ profiler.disable()
2205
+ stats = pstats.Stats(profiler).sort_stats('cumulative')
2206
+ stats.print_stats(30)
2207
 
2208
  else:
2209
  if not st.session_state.private_file:
 
2213
  elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
2214
  create_private_file()
2215
  elif st.session_state.proceed_to_main:
2216
+ if do_print_profiler:
2217
+ profiler = cProfile.Profile()
2218
+ profiler.enable()
2219
+
2220
  main()
2221
 
2222
+ if do_print_profiler:
2223
+ profiler.disable()
2224
+ stats = pstats.Stats(profiler).sort_stats('cumulative')
2225
+ stats.print_stats(30)
2226
+
2227
 
2228
 
2229
 
pages/faqs.py CHANGED
@@ -7,12 +7,15 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV FAQs'
7
  def display_faqs():
8
  c1, c2, c3 = st.columns([4,6,1])
9
  with c3:
10
- # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="🏠")
11
- # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="❔")
12
- # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
13
- st.page_link('app.py', label="Home", icon="🏠")
14
- st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
15
- st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
 
 
 
16
  with c2:
17
  st.write('If you would like to get more involved, have questions, would like to see additional features, then please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?usp=sf_link)')
18
  components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?embedded=true", height=900,scrolling=True,width=640)
 
7
  def display_faqs():
8
  c1, c2, c3 = st.columns([4,6,1])
9
  with c3:
10
+ try:
11
+ st.page_link('app.py', label="Home", icon="🏠")
12
+ st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
13
+ st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
14
+ except:
15
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="🏠")
16
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="❔")
17
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
18
+
19
  with c2:
20
  st.write('If you would like to get more involved, have questions, would like to see additional features, then please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?usp=sf_link)')
21
  components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?embedded=true", height=900,scrolling=True,width=640)
pages/prompt_builder.py CHANGED
@@ -138,11 +138,14 @@ def build_LLM_prompt_config():
138
  st.session_state.logo = Image.open(st.session_state.logo_path)
139
  st.image(st.session_state.logo, width=250)
140
  with col_main2:
141
- st.page_link('app.py', label="Home", icon="🏠")
142
- st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
143
- st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
144
- # st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)
145
- # st.page_link("http://www.google.com", label="Google", icon="🌎")
 
 
 
146
 
147
  st.session_state['assigned_columns'] = []
148
  st.session_state['default_prompt_author'] = 'unknown'
 
138
  st.session_state.logo = Image.open(st.session_state.logo_path)
139
  st.image(st.session_state.logo, width=250)
140
  with col_main2:
141
+ try:
142
+ st.page_link('app.py', label="Home", icon="🏠")
143
+ st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
144
+ st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
145
+ except:
146
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="🏠")
147
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="❔")
148
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
149
 
150
  st.session_state['assigned_columns'] = []
151
  st.session_state['default_prompt_author'] = 'unknown'
pages/report_bugs.py CHANGED
@@ -7,9 +7,14 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV Repor
7
  def display_report():
8
  c1, c2, c3 = st.columns([4,6,1])
9
  with c3:
10
- st.page_link('app.py', label="Home", icon="🏠")
11
- st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
12
- st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
 
 
 
 
 
13
 
14
  with c2:
15
  st.write('To report a bug or request a new feature please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?usp=sf_link)')
 
7
  def display_report():
8
  c1, c2, c3 = st.columns([4,6,1])
9
  with c3:
10
+ try:
11
+ st.page_link('app.py', label="Home", icon="🏠")
12
+ st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
13
+ st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
14
+ except:
15
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="🏠")
16
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="❔")
17
+ st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
18
 
19
  with c2:
20
  st.write('To report a bug or request a new feature please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?usp=sf_link)')