cboettig commited on
Commit
deb0388
·
1 Parent(s): 58f1141
Files changed (2) hide show
  1. pages/3_🕮_Docs_Demo.py +127 -0
  2. requirements.txt +3 -0
pages/3_🕮_Docs_Demo.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+
4
+ os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
5
+
6
+ # +
7
+
8
+ st.set_page_config(page_title="Protected Areas Database Chat", page_icon="🦜")
9
+ st.title("Protected Areas Database Chat")
10
+
11
+ st.markdown('''
12
+
13
+ This Chatbot is designed only to answer questions based on [PAD Technical How-Tos](https://www.protectedlands.net/pad-us-technical-how-tos/). The Chatbot will refuse to answer questions outside of this context.
14
+
15
+ Example queries:
16
+
17
+ - What is the difference between Fee and Easements?
18
+ - What do the gap status codes mean?
19
+ ''')
20
+ # -
21
+
22
+ # optional
23
+ # os.environ["LANGCHAIN_TRACING_V2"] = "true"
24
+ # os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()
25
+
26
+ import bs4
27
+ from langchain import hub
28
+ from langchain_community.document_loaders import WebBaseLoader
29
+ from langchain_chroma import Chroma
30
+ from langchain_core.output_parsers import StrOutputParser
31
+ from langchain_core.runnables import RunnablePassthrough
32
+ from langchain_openai import OpenAIEmbeddings
33
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
34
+ from langchain_community.llms import Ollama
35
+
36
+
37
+ from langchain_openai import ChatOpenAI
38
+
39
+ # +
40
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
41
+
42
+ # Setup LLM and QA chain
43
+
44
+ models = {"chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"], streaming=True),
45
+ "chatgpt4": ChatOpenAI(model="gpt-4", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
46
+ "phi3": Ollama(model="phi3", temperature=0),
47
+ "duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0),
48
+ "command-r-plus": Ollama(model="command-r-plus", temperature=0),
49
+ "mistral": Ollama(model="mistral", temperature=0),
50
+ "wizardlm2": Ollama(model="wizardlm2", temperature=0),
51
+ "sqlcoder": Ollama(model="sqlcoder", temperature=0),
52
+ "zephyr": Ollama(model="zephyr", temperature=0),
53
+ "gemma": Ollama(model="gemma", temperature=0),
54
+ "llama3": Ollama(model="llama3", temperature=0),
55
+ }
56
+
57
+
58
+ with st.sidebar:
59
+ choice = st.radio("Select an LLM:", models)
60
+ llm = models[choice]
61
+
62
+ # -
63
+
64
+ # Load, chunk and index the contents of the blog.
65
+ loader = WebBaseLoader(
66
+ web_paths=(["https://www.protectedlands.net/pad-us-technical-how-tos/",
67
+ "https://www.protectedlands.net/uses-of-pad-us/",
68
+ # "https://www.protectedlands.net/pad-us-data-structure-attributes/"
69
+ ]),
70
+ bs_kwargs=dict(
71
+ parse_only=bs4.SoupStrainer(
72
+ class_=("main-body")
73
+ )
74
+ ),
75
+ )
76
+ docs = loader.load()
77
+
78
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
79
+ splits = text_splitter.split_documents(docs)
80
+ vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
81
+
82
+ # Retrieve and generate using the relevant snippets of the blog.
83
+ retriever = vectorstore.as_retriever()
84
+ prompt = hub.pull("rlm/rag-prompt")
85
+
86
+ def format_docs(docs):
87
+ return "\n\n".join(doc.page_content for doc in docs)
88
+
89
+
90
+ rag_chain = (
91
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
92
+ | prompt
93
+ | llm
94
+ | StrOutputParser()
95
+ )
96
+
97
+ # +
98
+ # rag_chain.invoke("What is the difference between Fee and Easement?")
99
+
100
+
101
+
102
+ # +
103
+ from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
104
+ from langchain.memory import ConversationBufferMemory
105
+ from langchain.chains import ConversationalRetrievalChain
106
+
107
+ # Setup memory for contextual conversation
108
+ msgs = StreamlitChatMessageHistory()
109
+ memory = ConversationBufferMemory(memory_key="chat_history", chat_memory=msgs, return_messages=True)
110
+
111
+ #qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory, verbose=True)
112
+
113
+ if len(msgs.messages) == 0 or st.sidebar.button("Clear message history"):
114
+ msgs.clear()
115
+ msgs.add_ai_message("How can I help you?")
116
+
117
+ avatars = {"human": "user", "ai": "assistant"}
118
+ for msg in msgs.messages:
119
+ st.chat_message(avatars[msg.type]).write(msg.content)
120
+
121
+ if user_query := st.chat_input(placeholder="Ask me about PAD!"):
122
+ st.chat_message("user").write(user_query)
123
+
124
+ with st.chat_message("assistant"):
125
+ response = rag_chain.invoke(user_query)
126
+ response
127
+
requirements.txt CHANGED
@@ -4,6 +4,9 @@ altair
4
  langchain
5
  langchain-community
6
  langchain-openai
 
 
 
7
  SQLAlchemy==1.4.52
8
  streamlit
9
  geopandas
 
4
  langchain
5
  langchain-community
6
  langchain-openai
7
+ langchainhub
8
+ langchain-chroma
9
+ bs4
10
  SQLAlchemy==1.4.52
11
  streamlit
12
  geopandas