bmv2021 commited on
Commit
09fe857
·
1 Parent(s): 29b3738

updated correct file

Browse files
Files changed (1) hide show
  1. streamlit_app.py +33 -1
streamlit_app.py CHANGED
@@ -9,6 +9,8 @@ from langchain_core.prompts import PromptTemplate
9
  from dotenv import load_dotenv
10
  from RAG import RAG
11
  import logging
 
 
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO)
@@ -84,12 +86,42 @@ def display_sources(sources: List) -> None:
84
  try:
85
  with st.expander(f"Source {i}"):
86
  if hasattr(doc, 'page_content'):
87
- st.markdown(f"**Content:** {doc.page_content}")
88
  if hasattr(doc, 'metadata'):
89
  for key, value in doc.metadata.items():
90
  st.markdown(f"**{key.title()}:** {value}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  else:
92
  st.markdown(f"**Content:** {str(doc)}")
 
93
  except Exception as e:
94
  logger.error(f"Error displaying source {i}: {str(e)}")
95
  st.error(f"Error displaying source {i}")
 
9
  from dotenv import load_dotenv
10
  from RAG import RAG
11
  import logging
12
+ from image_scraper import DigitalCommonwealthScraper
13
+ import shutil
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO)
 
86
  try:
87
  with st.expander(f"Source {i}"):
88
  if hasattr(doc, 'page_content'):
89
+ st.markdown(f"**Content:** {doc.page_content[0:100] + ' ...'}")
90
  if hasattr(doc, 'metadata'):
91
  for key, value in doc.metadata.items():
92
  st.markdown(f"**{key.title()}:** {value}")
93
+
94
+ # Web Scraper to display images of sources
95
+ # Especially helpful if the sources are images themselves
96
+ # or are OCR'd text files
97
+ scraper = DigitalCommonwealthScraper()
98
+ images = scraper.extract_images(doc.metadata["URL"])
99
+ images = images[:1]
100
+
101
+ # If there are no images then don't display them
102
+ if not images:
103
+ st.warning("No images found on the page.")
104
+ return
105
+
106
+ # Download the images
107
+ # Delete the directory if it already exists
108
+ # to clear the existing cache of images for each listed source
109
+ output_dir = 'downloaded_images'
110
+ if os.path.exists(output_dir):
111
+ shutil.rmtree(output_dir)
112
+
113
+ # Download the main image to a local directory
114
+ downloaded_files = scraper.download_images(images)
115
+
116
+ # Display the image using st.image
117
+ # Display the title of the image using img.get
118
+ st.image(downloaded_files, width=400, caption=[
119
+ img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
120
+ ])
121
+
122
  else:
123
  st.markdown(f"**Content:** {str(doc)}")
124
+
125
  except Exception as e:
126
  logger.error(f"Error displaying source {i}: {str(e)}")
127
  st.error(f"Error displaying source {i}")