PabloVD commited on
Commit
f3576a5
·
1 Parent(s): bc84f5e

Use MistralAI endpoint directly and streaming bot

Browse files
Files changed (3) hide show
  1. app.py +122 -13
  2. requirements.txt +6 -6
  3. worker.py +0 -106
app.py CHANGED
@@ -1,21 +1,134 @@
 
1
  import gradio as gr
2
- import worker
 
 
 
 
 
 
 
 
3
  import requests
4
  from pathlib import Path
5
- import torchvision
6
- torchvision.disable_beta_transforms_warning()
 
 
 
 
 
 
 
 
7
 
8
  # Get data from url
9
  url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
10
  r = requests.get(url, stream=True)
11
- document_path = Path('metadata.pdf')
 
12
  document_path.write_bytes(r.content)
13
- # document_path="2022GS.pdf"
14
- worker.process_document(document_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def handle_prompt(message, history):
17
- bot_response = worker.process_prompt(message, history)
18
- return bot_response
 
 
 
 
 
 
19
 
20
  greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
21
  example_questions = [
@@ -24,11 +137,7 @@ example_questions = [
24
  "Which are the largest volumes in CAMELS simulations?",
25
  "How can I get the power spectrum of a simulation?"
26
  ]
27
- # chatbot = gr.Chatbot(value=[{"role": "assistant", "content": greetingsmessage}])
28
- # chatbot = gr.Chatbot(value=[[None, greetingsmessage]])
29
- # chatbot = gr.Chatbot(value=gr.ChatMessage(role="assistant",content="How can I help you?"))
30
- # chatbot = gr.Chatbot(placeholder=greetingsmessage)
31
 
32
- demo = gr.ChatInterface(handle_prompt, type="messages", title="CAMELS DocBot",examples=example_questions, theme=gr.themes.Soft(), description=greetingsmessage)#, chatbot=chatbot)
33
 
34
  demo.launch()
 
1
+ # https://python.langchain.com/docs/tutorials/rag/
2
  import gradio as gr
3
+ from langchain import hub
4
+ from langchain_chroma import Chroma
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from langchain_mistralai import MistralAIEmbeddings
8
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain_mistralai import ChatMistralAI
11
+ from langchain_community.document_loaders import PyPDFLoader
12
  import requests
13
  from pathlib import Path
14
+ from langchain_community.document_loaders import WebBaseLoader
15
+ import bs4
16
+ from langchain_core.rate_limiters import InMemoryRateLimiter
17
+ from urllib.parse import urljoin
18
+
19
+ rate_limiter = InMemoryRateLimiter(
20
+ requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
21
+ check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
22
+ max_bucket_size=10, # Controls the maximum burst size.
23
+ )
24
 
25
  # Get data from url
26
  url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
27
  r = requests.get(url, stream=True)
28
+ document_path = Path('data.pdf')
29
+
30
  document_path.write_bytes(r.content)
31
+ # document_path = "camels-readthedocs-io-en-latest.pdf"
32
+ loader = PyPDFLoader(document_path)
33
+ docs = loader.load()
34
+
35
+ # # Load, chunk and index the contents of the blog.
36
+ # url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
37
+ # loader = WebBaseLoader(
38
+ # web_paths=(url,),
39
+ # bs_kwargs=dict(
40
+ # parse_only=bs4.SoupStrainer(
41
+ # class_=("post-content", "post-title", "post-header")
42
+ # )
43
+ # ),
44
+ # )
45
+ # loader = WebBaseLoader(url)
46
+ # docs = loader.load()
47
+
48
+ # def get_subpages(base_url):
49
+ # visited_urls = []
50
+ # urls_to_visit = [base_url]
51
+
52
+ # while urls_to_visit:
53
+ # url = urls_to_visit.pop(0)
54
+ # if url in visited_urls:
55
+ # continue
56
+
57
+ # visited_urls.append(url)
58
+ # response = requests.get(url)
59
+ # soup = bs4.BeautifulSoup(response.content, "html.parser")
60
+
61
+ # for link in soup.find_all("a", href=True):
62
+ # full_url = urljoin(base_url, link['href'])
63
+ # if base_url in full_url and not full_url.endswith(".html") and full_url not in visited_urls:
64
+ # urls_to_visit.append(full_url)
65
+ # visited_urls = visited_urls[1:]
66
+
67
+ # return visited_urls
68
+
69
+ # base_url = "https://camels.readthedocs.io/en/latest/"
70
+ # # base_url = "https://carla.readthedocs.io/en/latest/"
71
+ # # urls = get_subpages(base_url)
72
+
73
+ # tokenfile = open("urls.txt")
74
+ # urls = tokenfile.readlines()
75
+ # urls = [url.replace("\n","") for url in urls]
76
+ # tokenfile.close()
77
+ # print(urls)
78
+
79
+ # # Load, chunk and index the contents of the blog.
80
+ # loader = WebBaseLoader(urls)
81
+ # docs = loader.load()
82
+
83
+ def format_docs(docs):
84
+ return "\n\n".join(doc.page_content for doc in docs)
85
+
86
+ def RAG(llm, docs, embeddings):
87
+
88
+ # Split text
89
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
90
+ splits = text_splitter.split_documents(docs)
91
+
92
+ # Create vector store
93
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
94
+
95
+ # Retrieve and generate using the relevant snippets of the documents
96
+ retriever = vectorstore.as_retriever()
97
+
98
+ # Prompt basis example for RAG systems
99
+ prompt = hub.pull("rlm/rag-prompt")
100
+
101
+ # Create the chain
102
+ rag_chain = (
103
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
104
+ | prompt
105
+ | llm
106
+ | StrOutputParser()
107
+ )
108
+
109
+ return rag_chain
110
+
111
+ # LLM model
112
+ llm = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
113
+
114
+ # Embeddings
115
+ embed_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
116
+ # embed_model = "nvidia/NV-Embed-v2"
117
+ embeddings = HuggingFaceInstructEmbeddings(model_name=embed_model)
118
+ # embeddings = MistralAIEmbeddings()
119
+
120
+ # RAG chain
121
+ rag_chain = RAG(llm, docs, embeddings)
122
 
123
  def handle_prompt(message, history):
124
+ try:
125
+ # Stream output
126
+ out=""
127
+ for chunk in rag_chain.stream(message):
128
+ out += chunk
129
+ yield out
130
+ except:
131
+ raise gr.Error("Requests rate limit exceeded")
132
 
133
  greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
134
  example_questions = [
 
137
  "Which are the largest volumes in CAMELS simulations?",
138
  "How can I get the power spectrum of a simulation?"
139
  ]
 
 
 
 
140
 
141
+ demo = gr.ChatInterface(handle_prompt, type="messages", title="CAMELS DocBot", examples=example_questions, theme=gr.themes.Soft(), description=greetingsmessage)#, chatbot=chatbot)
142
 
143
  demo.launch()
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- pdf2image
2
- pypdf
3
- tiktoken
4
  langchain
5
  langchain-community
6
- langchain-huggingface
7
- chromadb
8
- InstructorEmbedding
 
 
9
  huggingface_hub==0.25.2
 
 
 
 
 
1
  langchain
2
  langchain-community
3
+ langchain-chroma
4
+ langchain-mistralai
5
+ beautifulsoup4
6
+ pypdf==5.0.1
7
+ sentence-transformers==2.2.2
8
  huggingface_hub==0.25.2
9
+ InstructorEmbedding
worker.py DELETED
@@ -1,106 +0,0 @@
1
- import torch
2
- from langchain.chains import RetrievalQA
3
- from langchain_community.embeddings import HuggingFaceInstructEmbeddings
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.vectorstores import Chroma
7
- from langchain_huggingface import HuggingFaceEndpoint
8
- # import pip
9
-
10
- # def install(package):
11
- # if hasattr(pip, 'main'):
12
- # pip.main(['install', package])
13
- # else:
14
- # pip._internal.main(['install', package])
15
-
16
- # # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
17
- # install("sentence-transformers==2.2.2")
18
-
19
- # Check for GPU availability and set the appropriate device for computation.
20
- DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
21
- # DEVICE = "cpu"
22
-
23
- # Global variables
24
- conversation_retrieval_chain = None
25
- chat_history = []
26
- llm_hub = None
27
- embeddings = None
28
-
29
- # Function to initialize the language model and its embeddings
30
- def init_llm():
31
- global llm_hub, embeddings
32
- # Set up the environment variable for HuggingFace and initialize the desired model.
33
- # tokenfile = open("api_token.txt")
34
- # api_token = tokenfile.readline().replace("\n","")
35
- # tokenfile.close()
36
- # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
37
-
38
- # repo name for the model
39
- # model_id = "tiiuae/falcon-7b-instruct"
40
- model_id = "microsoft/Phi-3.5-mini-instruct"
41
- # model_id = "meta-llama/Llama-3.2-1B-Instruct"
42
- # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
43
-
44
- # load the model into the HuggingFaceHub
45
- llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
46
- llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
47
- # llm_hub.invoke('foo bar')
48
-
49
- #Initialize embeddings using a pre-trained model to represent the text data.
50
- embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
51
- # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
52
-
53
- embeddings = HuggingFaceInstructEmbeddings(
54
- model_name=embedddings_model,
55
- model_kwargs={"device": DEVICE}
56
- )
57
-
58
-
59
- # Function to process a PDF document
60
- def process_document(document_path):
61
- global conversation_retrieval_chain
62
-
63
- # Load the document
64
- loader = PyPDFLoader(document_path)
65
- documents = loader.load()
66
-
67
- # Split the document into chunks
68
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
69
- texts = text_splitter.split_documents(documents)
70
-
71
- # Create an embeddings database using Chroma from the split text chunks.
72
- db = Chroma.from_documents(texts, embedding=embeddings)
73
-
74
-
75
- # --> Build the QA chain, which utilizes the LLM and retriever for answering questions.
76
- # By default, the vectorstore retriever uses similarity search.
77
- # If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
78
- # You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
79
- retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
80
- conversation_retrieval_chain = RetrievalQA.from_chain_type(
81
- llm=llm_hub,
82
- chain_type="stuff",
83
- retriever=retriever,
84
- return_source_documents=False,
85
- input_key = "question"
86
- # chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
87
- )
88
-
89
-
90
- # Function to process a user prompt
91
- def process_prompt(prompt, chat_history):
92
- global conversation_retrieval_chain
93
- # global chat_history
94
-
95
- # Query the model
96
- output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
97
- answer = output["result"]
98
-
99
- # Update the chat history
100
- chat_history.append((prompt, answer))
101
-
102
- # Return the model's response
103
- return answer
104
-
105
- # Initialize the language model
106
- init_llm()