Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

App Files Files Community

PabloVD commited on Oct 31, 2024

Commit

f3576a5

1 Parent(s): bc84f5e

Use MistralAI endpoint directly and streaming bot

Browse files

Files changed (3) hide show

app.py +122 -13
requirements.txt +6 -6
worker.py +0 -106

app.py CHANGED Viewed

@@ -1,21 +1,134 @@
 import gradio as gr
-import worker
 import requests
 from pathlib import Path
-import torchvision
-torchvision.disable_beta_transforms_warning()
 # Get data from url
 url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
 r = requests.get(url, stream=True)
-document_path = Path('metadata.pdf')
 document_path.write_bytes(r.content)
-# document_path="2022GS.pdf"
-worker.process_document(document_path)
 def handle_prompt(message, history):
-    bot_response = worker.process_prompt(message, history)
-    return bot_response
 greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
 example_questions = [
@@ -24,11 +137,7 @@ example_questions = [
                     "Which are the largest volumes in CAMELS simulations?",
                     "How can I get the power spectrum of a simulation?"
                      ]
-# chatbot = gr.Chatbot(value=[{"role": "assistant", "content": greetingsmessage}])
-# chatbot = gr.Chatbot(value=[[None, greetingsmessage]])
-# chatbot = gr.Chatbot(value=gr.ChatMessage(role="assistant",content="How can I help you?"))
-# chatbot = gr.Chatbot(placeholder=greetingsmessage)
-demo = gr.ChatInterface(handle_prompt, type="messages", title="CAMELS DocBot",examples=example_questions, theme=gr.themes.Soft(), description=greetingsmessage)#, chatbot=chatbot)
 demo.launch()

+# https://python.langchain.com/docs/tutorials/rag/
 import gradio as gr
+from langchain import hub
+from langchain_chroma import Chroma
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_mistralai import MistralAIEmbeddings
+from langchain_community.embeddings import HuggingFaceInstructEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_mistralai import ChatMistralAI
+from langchain_community.document_loaders import PyPDFLoader
 import requests
 from pathlib import Path
+from langchain_community.document_loaders import WebBaseLoader
+import bs4
+from langchain_core.rate_limiters import InMemoryRateLimiter
+from urllib.parse import urljoin
+rate_limiter = InMemoryRateLimiter(
+    requests_per_second=0.1,  # <-- MistralAI free. We can only make a request once every second
+    check_every_n_seconds=0.01,  # Wake up every 100 ms to check whether allowed to make a request,
+    max_bucket_size=10,  # Controls the maximum burst size.
+)
 # Get data from url
 url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
 r = requests.get(url, stream=True)
+document_path = Path('data.pdf')
 document_path.write_bytes(r.content)
+# document_path = "camels-readthedocs-io-en-latest.pdf"
+loader = PyPDFLoader(document_path)
+docs = loader.load()
+# # Load, chunk and index the contents of the blog.
+# url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
+# loader = WebBaseLoader(
+#     web_paths=(url,),
+#     bs_kwargs=dict(
+#         parse_only=bs4.SoupStrainer(
+#             class_=("post-content", "post-title", "post-header")
+#         )
+#     ),
+# )
+# loader = WebBaseLoader(url)
+# docs = loader.load()
+# def get_subpages(base_url):
+#     visited_urls = []
+#     urls_to_visit = [base_url]
+#     while urls_to_visit:
+#         url = urls_to_visit.pop(0)
+#         if url in visited_urls:
+#             continue
+#         visited_urls.append(url)
+#         response = requests.get(url)
+#         soup = bs4.BeautifulSoup(response.content, "html.parser")
+#         for link in soup.find_all("a", href=True):
+#             full_url = urljoin(base_url, link['href'])
+#             if base_url in full_url and not full_url.endswith(".html") and full_url not in visited_urls:
+#                 urls_to_visit.append(full_url)
+#     visited_urls = visited_urls[1:]
+#     return visited_urls
+# base_url = "https://camels.readthedocs.io/en/latest/"
+# # base_url = "https://carla.readthedocs.io/en/latest/"
+# # urls = get_subpages(base_url)
+# tokenfile = open("urls.txt")
+# urls = tokenfile.readlines()
+# urls = [url.replace("\n","") for url in urls]
+# tokenfile.close()
+# print(urls)
+# # Load, chunk and index the contents of the blog.
+# loader = WebBaseLoader(urls)
+# docs = loader.load()
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+def RAG(llm, docs, embeddings):
+    # Split text
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    splits = text_splitter.split_documents(docs)
+    # Create vector store
+    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
+    # Retrieve and generate using the relevant snippets of the documents
+    retriever = vectorstore.as_retriever()
+    # Prompt basis example for RAG systems
+    prompt = hub.pull("rlm/rag-prompt")
+    # Create the chain
+    rag_chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    return rag_chain
+# LLM model
+llm = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
+# Embeddings
+embed_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
+# embed_model = "nvidia/NV-Embed-v2"
+embeddings = HuggingFaceInstructEmbeddings(model_name=embed_model)
+# embeddings = MistralAIEmbeddings()
+# RAG chain
+rag_chain = RAG(llm, docs, embeddings)
 def handle_prompt(message, history):
+    try:
+        # Stream output
+        out=""
+        for chunk in rag_chain.stream(message):
+            out += chunk
+            yield out
+    except:
+        raise gr.Error("Requests rate limit exceeded")
 greetingsmessage = "Hi, I'm the CAMELS DocBot, I'm here to assist you with any question related to the CAMELS simulations documentation"
 example_questions = [
                     "Which are the largest volumes in CAMELS simulations?",
                     "How can I get the power spectrum of a simulation?"
                      ]
+demo = gr.ChatInterface(handle_prompt, type="messages", title="CAMELS DocBot", examples=example_questions, theme=gr.themes.Soft(), description=greetingsmessage)#, chatbot=chatbot)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-pdf2image
-pypdf
-tiktoken
 langchain
 langchain-community
-langchain-huggingface
-chromadb
-InstructorEmbedding
 huggingface_hub==0.25.2

 langchain
 langchain-community
+langchain-chroma
+langchain-mistralai
+beautifulsoup4
+pypdf==5.0.1
+sentence-transformers==2.2.2
 huggingface_hub==0.25.2
+InstructorEmbedding

worker.py DELETED Viewed

@@ -1,106 +0,0 @@
-import torch
-from langchain.chains import RetrievalQA
-from langchain_community.embeddings import HuggingFaceInstructEmbeddings
-from langchain_community.document_loaders import PyPDFLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_huggingface import HuggingFaceEndpoint
-# import pip
-# def install(package):
-#     if hasattr(pip, 'main'):
-#         pip.main(['install', package])
-#     else:
-#         pip._internal.main(['install', package])
-# # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
-# install("sentence-transformers==2.2.2")
-# Check for GPU availability and set the appropriate device for computation.
-DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
-# DEVICE = "cpu"
-# Global variables
-conversation_retrieval_chain = None
-chat_history = []
-llm_hub = None
-embeddings = None
-# Function to initialize the language model and its embeddings
-def init_llm():
-    global llm_hub, embeddings
-    # Set up the environment variable for HuggingFace and initialize the desired model.
-    # tokenfile = open("api_token.txt")
-    # api_token = tokenfile.readline().replace("\n","")
-    # tokenfile.close()
-    # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
-    # repo name for the model
-    # model_id = "tiiuae/falcon-7b-instruct"
-    model_id = "microsoft/Phi-3.5-mini-instruct"
-    # model_id = "meta-llama/Llama-3.2-1B-Instruct"
-    # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    # load the model into the HuggingFaceHub
-    llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
-    llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
-    # llm_hub.invoke('foo bar')
-    #Initialize embeddings using a pre-trained model to represent the text data.
-    embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
-    # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
-    embeddings = HuggingFaceInstructEmbeddings(
-        model_name=embedddings_model,
-        model_kwargs={"device": DEVICE}
-    )
-# Function to process a PDF document
-def process_document(document_path):
-    global conversation_retrieval_chain
-    # Load the document
-    loader = PyPDFLoader(document_path)
-    documents = loader.load()
-    # Split the document into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
-    texts = text_splitter.split_documents(documents)
-    # Create an embeddings database using Chroma from the split text chunks.
-    db = Chroma.from_documents(texts, embedding=embeddings)
-    # --> Build the QA chain, which utilizes the LLM and retriever for answering questions.
-    # By default, the vectorstore retriever uses similarity search.
-    # If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
-    # You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
-    retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
-    conversation_retrieval_chain = RetrievalQA.from_chain_type(
-        llm=llm_hub,
-        chain_type="stuff",
-        retriever=retriever,
-        return_source_documents=False,
-        input_key = "question"
-     #   chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
-    )
-# Function to process a user prompt
-def process_prompt(prompt, chat_history):
-    global conversation_retrieval_chain
-    # global chat_history
-    # Query the model
-    output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
-    answer = output["result"]
-    # Update the chat history
-    chat_history.append((prompt, answer))
-    # Return the model's response
-    return answer
-# Initialize the language model
-init_llm()