Dan Foley commited on
Commit
1e9dc21
·
unverified ·
1 Parent(s): 388d2d5

Delete new_streamlit.py

Browse files
Files changed (1) hide show
  1. new_streamlit.py +0 -188
new_streamlit.py DELETED
@@ -1,188 +0,0 @@
1
- import streamlit as st
2
- import os
3
- from typing import List, Tuple, Optional
4
- from pinecone import Pinecone
5
- from langchain_pinecone import PineconeVectorStore
6
- from langchain_huggingface import HuggingFaceEmbeddings
7
- from langchain_openai import ChatOpenAI
8
- from langchain_core.prompts import PromptTemplate
9
- from dotenv import load_dotenv
10
- from RAG import RAG
11
- from bpl_scraper import DigitalCommonwealthScraper
12
- import logging
13
- import json
14
- import shutil
15
- from PIL import Image
16
- import io
17
-
18
- # Configure logging
19
- logging.basicConfig(level=logging.INFO)
20
- logger = logging.getLogger(__name__)
21
-
22
- # Page configuration
23
- st.set_page_config(
24
- page_title="Boston Public Library Chatbot",
25
- page_icon="🤖",
26
- layout="wide"
27
- )
28
-
29
- def initialize_models() -> Tuple[Optional[ChatOpenAI], HuggingFaceEmbeddings]:
30
- """Initialize the language model and embeddings."""
31
- try:
32
- load_dotenv()
33
-
34
- # Initialize OpenAI model
35
- llm = ChatOpenAI(
36
- model="gpt-4", # Changed from gpt-4o-mini which appears to be a typo
37
- temperature=0,
38
- timeout=60, # Added reasonable timeout
39
- max_retries=2
40
- )
41
-
42
- # Initialize embeddings
43
- embeddings = HuggingFaceEmbeddings(
44
- model_name="sentence-transformers/all-MiniLM-L6-v2"
45
- )
46
-
47
- return llm, embeddings
48
-
49
- except Exception as e:
50
- logger.error(f"Error initializing models: {str(e)}")
51
- st.error(f"Failed to initialize models: {str(e)}")
52
- return None, None
53
-
54
- def process_message(
55
- query: str,
56
- llm: ChatOpenAI,
57
- index_name: str,
58
- embeddings: HuggingFaceEmbeddings
59
- ) -> Tuple[str, List]:
60
- """Process the user message using the RAG system."""
61
- try:
62
- response, sources = RAG(
63
- query=query,
64
- llm=llm,
65
- index_name=index_name,
66
- embeddings=embeddings
67
- )
68
- return response, sources
69
- except Exception as e:
70
- logger.error(f"Error in process_message: {str(e)}")
71
- return f"Error processing message: {str(e)}", []
72
-
73
- def display_sources(sources: List) -> None:
74
- """Display sources in expandable sections with proper formatting."""
75
- if not sources:
76
- st.info("No sources available for this response.")
77
- return
78
-
79
- st.subheader("Sources")
80
- for i, doc in enumerate(sources, 1):
81
- try:
82
- with st.expander(f"Source {i}"):
83
- if hasattr(doc, 'page_content'):
84
- st.markdown(f"**Content:** {doc.page_content[0:100] + ' ...'}")
85
- if hasattr(doc, 'metadata'):
86
- for key, value in doc.metadata.items():
87
- st.markdown(f"**{key.title()}:** {value}")
88
-
89
- # Web Scraper to display images of sources
90
- # Especially helpful if the sources are images themselves
91
- # or are OCR'd text files
92
- scraper = DigitalCommonwealthScraper()
93
- images = scraper.extract_images(doc.metadata["URL"])
94
- images = images[:1]
95
-
96
- # If there are no images then don't display them
97
- if not images:
98
- st.warning("No images found on the page.")
99
- return
100
-
101
- # Download the images
102
- # Delete the directory if it already exists
103
- # to clear the existing cache of images for each listed source
104
- output_dir = 'downloaded_images'
105
- if os.path.exists(output_dir):
106
- shutil.rmtree(output_dir)
107
-
108
- # Download the main image to a local directory
109
- downloaded_files = scraper.download_images(images)
110
-
111
- # Display the image using st.image
112
- # Display the title of the image using img.get
113
- st.image(downloaded_files, width=400, caption=[
114
- img.get('alt', f'Image {i+1}') for i, img in enumerate(images)
115
- ])
116
-
117
- else:
118
- st.markdown(f"**Content:** {str(doc)}")
119
-
120
- except Exception as e:
121
- logger.error(f"Error displaying source {i}: {str(e)}")
122
- st.error(f"Error displaying source {i}")
123
-
124
-
125
- def main():
126
- st.title("Boston Public Library RAG Chatbot")
127
-
128
- # Initialize session state
129
- if "messages" not in st.session_state:
130
- st.session_state.messages = []
131
-
132
- # Initialize models
133
- llm, embeddings = initialize_models()
134
- if not llm or not embeddings:
135
- st.error("Failed to initialize the application. Please check the logs.")
136
- return
137
-
138
- # Constants
139
- INDEX_NAME = 'bpl-rag'
140
-
141
- # Display chat history
142
- for message in st.session_state.messages:
143
- with st.chat_message(message["role"]):
144
- st.markdown(message["content"])
145
-
146
- # Chat input
147
- user_input = st.chat_input("Type your message here...")
148
-
149
-
150
-
151
- if user_input:
152
- # Display user message
153
- with st.chat_message("user"):
154
- st.markdown(user_input)
155
- st.session_state.messages.append({"role": "user", "content": user_input})
156
-
157
- # Process and display assistant response
158
- with st.chat_message("assistant"):
159
- with st.spinner("Thinking..."):
160
- response, sources = process_message(
161
- query=user_input,
162
- llm=llm,
163
- index_name=INDEX_NAME,
164
- embeddings=embeddings
165
- )
166
-
167
- if isinstance(response, str):
168
- st.markdown(response)
169
- st.session_state.messages.append({
170
- "role": "assistant",
171
- "content": response
172
- })
173
-
174
- # Display sources
175
- display_sources(sources)
176
-
177
- else:
178
- st.error("Received an invalid response format")
179
-
180
- # Footer
181
- st.markdown("---")
182
- st.markdown(
183
- "Built with ❤️ using Streamlit + LangChain + OpenAI",
184
- help="An AI-powered chatbot with RAG capabilities"
185
- )
186
-
187
- if __name__ == "__main__":
188
- main()