Spaces:
Running
Running
updated correct file
Browse files- streamlit_app.py +33 -1
streamlit_app.py
CHANGED
@@ -9,6 +9,8 @@ from langchain_core.prompts import PromptTemplate
|
|
9 |
from dotenv import load_dotenv
|
10 |
from RAG import RAG
|
11 |
import logging
|
|
|
|
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO)
|
@@ -84,12 +86,42 @@ def display_sources(sources: List) -> None:
|
|
84 |
try:
|
85 |
with st.expander(f"Source {i}"):
|
86 |
if hasattr(doc, 'page_content'):
|
87 |
-
st.markdown(f"**Content:** {doc.page_content}")
|
88 |
if hasattr(doc, 'metadata'):
|
89 |
for key, value in doc.metadata.items():
|
90 |
st.markdown(f"**{key.title()}:** {value}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
else:
|
92 |
st.markdown(f"**Content:** {str(doc)}")
|
|
|
93 |
except Exception as e:
|
94 |
logger.error(f"Error displaying source {i}: {str(e)}")
|
95 |
st.error(f"Error displaying source {i}")
|
|
|
9 |
from dotenv import load_dotenv
|
10 |
from RAG import RAG
|
11 |
import logging
|
12 |
+
from image_scraper import DigitalCommonwealthScraper
|
13 |
+
import shutil
|
14 |
|
15 |
# Configure logging
|
16 |
logging.basicConfig(level=logging.INFO)
|
|
|
86 |
try:
|
87 |
with st.expander(f"Source {i}"):
|
88 |
if hasattr(doc, 'page_content'):
|
89 |
+
st.markdown(f"**Content:** {doc.page_content[0:100] + ' ...'}")
|
90 |
if hasattr(doc, 'metadata'):
|
91 |
for key, value in doc.metadata.items():
|
92 |
st.markdown(f"**{key.title()}:** {value}")
|
93 |
+
|
94 |
+
# Web Scraper to display images of sources
|
95 |
+
# Especially helpful if the sources are images themselves
|
96 |
+
# or are OCR'd text files
|
97 |
+
scraper = DigitalCommonwealthScraper()
|
98 |
+
images = scraper.extract_images(doc.metadata["URL"])
|
99 |
+
images = images[:1]
|
100 |
+
|
101 |
+
# If there are no images then don't display them
|
102 |
+
if not images:
|
103 |
+
st.warning("No images found on the page.")
|
104 |
+
return
|
105 |
+
|
106 |
+
# Download the images
|
107 |
+
# Delete the directory if it already exists
|
108 |
+
# to clear the existing cache of images for each listed source
|
109 |
+
output_dir = 'downloaded_images'
|
110 |
+
if os.path.exists(output_dir):
|
111 |
+
shutil.rmtree(output_dir)
|
112 |
+
|
113 |
+
# Download the main image to a local directory
|
114 |
+
downloaded_files = scraper.download_images(images)
|
115 |
+
|
116 |
+
# Display the image using st.image
|
117 |
+
# Display the title of the image using img.get
|
118 |
+
st.image(downloaded_files, width=400, caption=[
|
119 |
+
img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
|
120 |
+
])
|
121 |
+
|
122 |
else:
|
123 |
st.markdown(f"**Content:** {str(doc)}")
|
124 |
+
|
125 |
except Exception as e:
|
126 |
logger.error(f"Error displaying source {i}: {str(e)}")
|
127 |
st.error(f"Error displaying source {i}")
|