cboettig commited on
Commit
ba2d2e0
·
1 Parent(s): 13dd191
Files changed (2) hide show
  1. rag.py +68 -0
  2. sql.py +13 -0
rag.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +
2
+ import streamlit as st
3
+
4
+ # Set up the document loader
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ loader = PyPDFLoader("ca30x30-2024.pdf")
7
+ docs = loader.load()
8
+
9
+ # Set up the language model
10
+ from langchain_openai import ChatOpenAI
11
+ llm = ChatOpenAI(model = "llama3", api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io", temperature=0)
12
+
13
+ # Set up the embedding model
14
+ from langchain_openai import OpenAIEmbeddings
15
+ embedding = OpenAIEmbeddings(
16
+ model = "embed-mistral",
17
+ api_key=st.secrets["LITELLM_KEY"],
18
+ base_url = "https://llm.nrp-nautilus.io"
19
+ )
20
+
21
+ # Build a retrival agent
22
+ from langchain_core.vectorstores import InMemoryVectorStore
23
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
25
+ splits = text_splitter.split_documents(docs)
26
+ vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=embedding)
27
+ retriever = vectorstore.as_retriever()
28
+
29
+ from langchain.chains import create_retrieval_chain
30
+ from langchain.chains.combine_documents import create_stuff_documents_chain
31
+ from langchain_core.prompts import ChatPromptTemplate
32
+ system_prompt = (
33
+ "You are an assistant for question-answering tasks. "
34
+ "Use the following pieces of retrieved context to answer "
35
+ "the question. If you don't know the answer, say that you "
36
+ "don't know. Use three sentences maximum and keep the "
37
+ "answer concise."
38
+ "\n\n"
39
+ "{context}"
40
+ )
41
+ prompt = ChatPromptTemplate.from_messages(
42
+ [
43
+ ("system", system_prompt),
44
+ ("human", "{input}"),
45
+ ]
46
+ )
47
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
48
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
49
+
50
+ # agent is ready to test:
51
+ #results = rag_chain.invoke({"input": "What is the goal of CA 30x30?"})
52
+ #results['answer']
53
+
54
+
55
+ # Place agent inside a streamlit application:
56
+ st.title("RAG Demo")
57
+
58
+ if prompt := st.chat_input("What is the goal of CA 30x30?"):
59
+ with st.chat_message("user"):
60
+ st.markdown(prompt)
61
+
62
+ with st.chat_message("assistant"):
63
+ results = rag_chain.invoke({"input": prompt})
64
+ st.write(results['answer'])
65
+
66
+ st.write('**Context metadata:**\n')
67
+ st.write(results['context'][0]['metadata'])
68
+
sql.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from langchain_community.utilities import SQLDatabase
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain.chains import create_sql_query_chain
6
+
7
+ # Set up Langchain SQL access
8
+ db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
9
+ parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
10
+ db.run(f"create or replace view gbif_h3 as select * from read_parquet('{parquet}');")
11
+ llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
12
+ chain = create_sql_query_chain(llm, db)
13
+