HF-QA-Demo

Paused

App Files Files Community

MWilinski commited on Dec 1, 2023

Commit

f9bb2cd

1 Parent(s): d18951e

zephyr on t4

Browse files

Files changed (9) hide show

benchmark/__main__.py +77 -0
benchmark/questions.json +38 -0
config/prompt_templates/llama2.txt +5 -4
config/prompt_templates/mistral-7b-instruct.txt +0 -7
config/prompt_templates/zephyr_7b.txt +10 -0
data/benchmark/.gitkeep +0 -0
data/indexing_benchmark.ipynb +387 -0
qa_engine/mocks.py +5 -19
qa_engine/qa_engine.py +37 -3

benchmark/__main__.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import time
+import json
+import wandb
+import gradio as gr
+from qa_engine import logger, Config, QAEngine
+QUESTIONS_FILENAME = 'benchmark/questions.json'
+config = Config()
+qa_engine = QAEngine(
+    llm_model_id=config.question_answering_model_id,
+    embedding_model_id=config.embedding_model_id,
+    index_repo_id=config.index_repo_id,
+    prompt_template=config.prompt_template,
+    use_docs_for_context=config.use_docs_for_context,
+    add_sources_to_response=config.add_sources_to_response,
+    use_messages_for_context=config.use_messages_in_context,
+    debug=config.debug
+)
+def main():
+    filtered_config = config.asdict()
+    disallowed_config_keys = [
+        "DISCORD_TOKEN", "NUM_LAST_MESSAGES", "USE_NAMES_IN_CONTEXT",
+        "ENABLE_COMMANDS", "APP_MODE", "DEBUG"
+    ]
+    for key in disallowed_config_keys:
+        filtered_config.pop(key, None)
+    wandb.init(
+        project='HF-Docs-QA',
+        entity='hf-qa-bot',
+        name=f'{config.question_answering_model_id} - {config.embedding_model_id} - {config.index_repo_id}',
+        mode='run', # run/disabled
+        config=filtered_config
+    )
+    with open(QUESTIONS_FILENAME, 'r') as f:
+        questions = json.load(f)
+    table = wandb.Table(
+        columns=[
+            "id", "question", "messages_context", "answer", "sources", "time"
+        ]
+    )
+    for i, q in enumerate(questions):
+        logger.info(f"Question {i+1}/{len(questions)}")
+        question = q['question']
+        messages_context = q['messages_context']
+        time_start = time.perf_counter()
+        response = qa_engine.get_response(
+            question=question,
+            messages_context=messages_context
+        )
+        time_end = time.perf_counter()
+        table.add_data(
+            i,
+            question,
+            messages_context,
+            response.get_answer(),
+            response.get_sources_as_text(),
+            time_end - time_start
+        )
+    wandb.log({"answers": table})
+    wandb.finish()
+if __name__ == '__main__':
+    main()

benchmark/questions.json ADDED Viewed

	@@ -0,0 +1,38 @@

+[
+    {
+        "question": "How to create audio dataset with Hugging Face?",
+        "messages_context": " "
+    },
+    {
+        "question": "I want to check if 2 sentences are similar semantically. How can I do it?",
+        "messages_context": " "
+    },
+    {
+        "question": "What are the benefits of Gradio?",
+        "messages_context": " "
+    },
+    {
+        "question": "How to deploy a text-to-image model?",
+        "messages_context": " "
+    },
+    {
+        "question": "Does Hugging Face offer any distributed training assistance? followup: Can you give me an example setup of it?",
+        "messages_context": " "
+    },
+    {
+        "question": "I want to detect cars on video recording. How should I do it and what models do you recommend?",
+        "messages_context": " "
+    },
+    {
+        "question": "Is there any tool for evaluating models in Hugging Face? followup: Can you give me an example setup of it?",
+        "messages_context": " "
+    },
+    {
+        "question": "What are some advantages of the Hugging Face Hub?",
+        "messages_context": " "
+    },
+    {
+        "question": "How would I use a model in 8 bit in transformers?",
+        "messages_context": " "
+    }
+]

config/prompt_templates/llama2.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-<<SYS>>
-**CONTEXT FOR THE QUESTION**:
-{context}
 <</SYS>>
-[INST] Respond for the user question as factualy as possible, using given context. [/INST] User: {question}

+<<SYS>>Using the information contained in the context,
+give a comprehensive answer to the question.
+Respond only to the question asked, response should be concise and relevant to the question.
+If the answer cannot be deduced from the context, do not give an answer.
 <</SYS>>
+[INST] Context: {context} [/INST] User: {question}

config/prompt_templates/mistral-7b-instruct.txt DELETED Viewed

@@ -1,7 +0,0 @@
-[INST]
-You are a helpful assistant for question-answering over Huggging Face documentation. Use the following chunks of relevant information to answer the question. If you don't know the answer, just say that you don't know. You can output code snippets that appear in the documentation to enrich the answer.
-<QUESTION>:
-{question}
-<CONTEXT>:
-{context}
-[/INST]

config/prompt_templates/zephyr_7b.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+<|system|>Using the information contained in the context,
+give a comprehensive answer to the question.
+Respond only to the question asked, response should be concise and relevant to the question.
+If the answer cannot be deduced from the context, do not give an answer.</s>
+<|user|>
+Context:
+  {context}
+  Question: {question}
+  </s>
+<|assistant|>

data/benchmark/.gitkeep DELETED Viewed

File without changes

data/indexing_benchmark.ipynb ADDED Viewed

	@@ -0,0 +1,387 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "from typing import List, Union, Any\n",
+    "from tqdm import tqdm\n",
+    "from sentence_transformers import CrossEncoder\n",
+    "from langchain.chains import RetrievalQA\n",
+    "from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings\n",
+    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.indexes import VectorstoreIndexCreator\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.vectorstores import FAISS\n",
+    "from sentence_transformers import CrossEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class AverageInstructEmbeddings(HuggingFaceInstructEmbeddings):\n",
+    "    max_length: int = None\n",
+    "    def __init__(self, max_length: int = 512, **kwargs: Any):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.max_length = max_length\n",
+    "        if self.max_length < 0:\n",
+    "            print('max_length is not specified, using model default max_seq_length')\n",
+    "\n",
+    "    def embed_documents(self, texts: List[str]) -> List[List[float]]:\n",
+    "        all_embeddings = []\n",
+    "        for text in tqdm(texts, desc=\"Embedding documents\"):\n",
+    "            if len(text) > self.max_length and self.max_length > -1:\n",
+    "                n_chunks = math.ceil(len(text)/self.max_length)\n",
+    "                chunks = [\n",
+    "                    text[i*self.max_length:(i+1)*self.max_length]\n",
+    "                    for i in range(n_chunks)\n",
+    "                ]\n",
+    "                instruction_pairs = [[self.embed_instruction, chunk] for chunk in chunks]\n",
+    "                chunk_embeddings = self.client.encode(instruction_pairs)\n",
+    "                avg_embedding = np.mean(chunk_embeddings, axis=0)\n",
+    "                all_embeddings.append(avg_embedding.tolist())\n",
+    "            else:\n",
+    "                instruction_pairs = [[self.embed_instruction, text]]\n",
+    "                embeddings = self.client.encode(instruction_pairs)\n",
+    "                all_embeddings.append(embeddings[0].tolist())\n",
+    "\n",
+    "        return all_embeddings\n",
+    "\n",
+    "\n",
+    "class BenchDataST:\n",
+    "    def __init__(self, path: str, percentage: float = 0.005, chunk_size: int = 512, chunk_overlap: int = 100):\n",
+    "        self.path = path\n",
+    "        self.percentage = percentage\n",
+    "        self.docs = []\n",
+    "        self.metadata = []\n",
+    "        self.load()\n",
+    "        self.text_splitter = CharacterTextSplitter(separator=\"\", chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
+    "        self.docs_processed = self.text_splitter.create_documents(self.docs, self.metadata)\n",
+    "\n",
+    "    def load(self):\n",
+    "        for p in Path(self.path).iterdir():\n",
+    "            if not p.is_dir():\n",
+    "                with open(p) as f:\n",
+    "                    source = f.readline().strip().replace('source: ', '')\n",
+    "                    self.docs.append(f.read())\n",
+    "                    self.metadata.append({\"source\": source})\n",
+    "        self.docs = self.docs[:int(len(self.docs) * self.percentage)]\n",
+    "        self.metadata = self.metadata[:int(len(self.metadata) * self.percentage)]\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.docs)\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        return self.docs[idx], self.metadata[idx]\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        for doc, metadata in zip(self.docs, self.metadata):\n",
+    "            yield doc, metadata\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f'BenchDataST({len(self)} docs) at {self.path} with {self.percentage} percentage \\nSources: {self.metadata} \\nChunks: {self.text_splitter}'\n",
+    "    \n",
+    "\n",
+    "class BenchmarkST:\n",
+    "    def __init__(self, data: BenchDataST, baseline_model: Union[HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, AverageInstructEmbeddings], embedding_models: List[Union[HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, AverageInstructEmbeddings]]):\n",
+    "        self.data = data\n",
+    "        self.baseline_model = baseline_model\n",
+    "        self.embedding_models = embedding_models\n",
+    "        self.baseline_index, self.indexes  = self.build_indexes()\n",
+    "\n",
+    "    def build_indexes(self):\n",
+    "        indexes = []\n",
+    "        for model in [self.baseline_model] + self.embedding_models:\n",
+    "            print(f\"Building index for {model}\")\n",
+    "            index = FAISS.from_documents(self.data.docs_processed, model)\n",
+    "            indexes.append(index)\n",
+    "        return indexes[0], indexes[1:]\n",
+    "    \n",
+    "    def add_index(self, index: FAISS):\n",
+    "        self.indexes.append(index)\n",
+    "    \n",
+    "    def evaluate(self, query: str, k: int = 3):\n",
+    "        baseline_results = self.baseline_index.similarity_search_with_score(query, k=k)\n",
+    "        results = []\n",
+    "        for index in self.indexes:\n",
+    "            results.append(index.similarity_search_with_score(query, k=k))\n",
+    "        return baseline_results, results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "load INSTRUCTOR_Transformer\n",
+      "max_seq_length  512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No sentence-transformers model found with name /Users/michalwilinski/.cache/torch/sentence_transformers/cross-encoder_ms-marco-MiniLM-L-12-v2. Creating a new one with MEAN pooling.\n",
+      "Some weights of the model checkpoint at /Users/michalwilinski/.cache/torch/sentence_transformers/cross-encoder_ms-marco-MiniLM-L-12-v2 were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building index for client=INSTRUCTOR(\n",
+      "  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: T5EncoderModel \n",
+      "  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})\n",
+      "  (2): Dense({'in_features': 768, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})\n",
+      "  (3): Normalize()\n",
+      ") model_name='hkunlp/instructor-base' cache_folder=None model_kwargs={} encode_kwargs={} embed_instruction='Represent this piece of text for searching relevant information:' query_instruction='Query the most relevant piece of information from the Hugging Face documentation' max_length=512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Embedding documents: 100%|██████████| 278/278 [00:19<00:00, 14.11it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building index for client=SentenceTransformer(\n",
+      "  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n",
+      "  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
+      ") model_name='cross-encoder/ms-marco-MiniLM-L-12-v2' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = BenchDataST(\n",
+    "    path=\"./datasets/huggingface_docs/\",\n",
+    "    percentage=0.005,\n",
+    "    chunk_size=512,\n",
+    "    chunk_overlap=100\n",
+    ")\n",
+    "\n",
+    "baseline_embedding_model = AverageInstructEmbeddings(\n",
+    "    model_name=\"hkunlp/instructor-base\",\n",
+    "    embed_instruction=\"Represent this piece of text for searching relevant information:\",\n",
+    "    query_instruction=\"Query the most relevant piece of information from the Hugging Face documentation\",\n",
+    "    max_length=512,\n",
+    ")\n",
+    "\n",
+    "embedding_model = HuggingFaceEmbeddings(\n",
+    "    model_name=\"intfloat/e5-large-v2\",\n",
+    ")\n",
+    "\n",
+    "cross_encoder = HuggingFaceEmbeddings(model_name=\"cross-encoder/ms-marco-MiniLM-L-12-v2\")\n",
+    "\n",
+    "benchmark = BenchmarkST(\n",
+    "    data=data,\n",
+    "    baseline_model=baseline_embedding_model,\n",
+    "    embedding_models=[cross_encoder]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Baseline results:\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.23610792\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24087097\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24181677\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24541612\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24639006\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.24780047\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.2535807\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.25887597\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27293646\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27374876\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.27710187\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.28146794\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.29536068\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.29784447\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.30452335\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.3061711\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.31600478\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.3166225\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.33345556\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.3469957\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.35222226\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.36451602\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.36925688\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 0.37025565\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.37112093\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37146708\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.3766507\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37794292\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.37923962\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.38359642\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.3878625\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.39796114\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.40057343\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.40114868\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.40156174\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.40341228\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.40720195\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41241395\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4134417\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4134435\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41754264\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41917825\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41928726\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.41988587\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.42029166\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.42128915\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4226097\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.42302307\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/demo/stt_or_tts/run.ipynb'} 0.4252566\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/README.md'} 0.42704937\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4297651\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43067485\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43116528\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.43272027\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.43434155\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43486434\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43524152\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.43530554\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4371896\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43753576\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43824\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4384127\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43900505\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.43903238\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.44034868\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.44217598\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/schedulers/euler_ancestral.md'} 0.4426194\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44303834\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.4452571\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44619536\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.44652176\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/demo/stt_or_tts/run.ipynb'} 0.44683564\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.44743723\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44768596\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4477852\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.44906363\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45155957\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45215163\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45415214\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4541726\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.4542602\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.4544394\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.45448524\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.454512\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.45478693\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/schedulers/euler_ancestral.md'} 0.45494407\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.45494407\n",
+      "{'source': 'https://github.com/gradio-app/gradio/blob/main/js/accordion/CHANGELOG.md'} 0.45520714\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.4559689\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.4568352\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.4577096\n",
+      "{'source': 'https://github.com/huggingface/simulate/blob/main/docs/source/api/lights.mdx'} 0.4577096\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.45773098\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.45818624\n",
+      "{'source': 'https://github.com/huggingface/optimum/blob/main/docs/source/exporters/onnx/usage_guides/export_a_model.mdx'} 0.45871085\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/bloom.md'} 0.4591412\n",
+      "{'source': 'https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md'} 0.46033093\n",
+      "{'source': 'https://github.com/huggingface/blog/blob/main/accelerate-deepspeed.md'} 0.4605264\n",
+      "{'source': 'https://github.com/huggingface/pytorch-image-models/blob/main/docs/changes.md'} 0.46091354\n",
+      "{'source': 'https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/open-llama.md'} 0.46182537\n",
+      "Cross encoder results:\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} 6.840022\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} -0.98426485\n",
+      "{'source': 'https://github.com/huggingface/course/blob/main/chapters/en/chapter6/4.mdx'} -1.9345549\n",
+      "bye\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = \"textual inversion\"\n",
+    "k = 100\n",
+    "baseline_results, results = benchmark.evaluate(query=query, k=k)\n",
+    "print(\"Baseline results:\")\n",
+    "[print(doc.metadata,score) for (doc,score) in baseline_results]\n",
+    "cross_encoder = CrossEncoder(\"cross-encoder/ms-marco-MiniLM-L-12-v2\")\n",
+    "cross_encoder_results = cross_encoder.predict([(query, doc.page_content) for doc in data.docs_processed])\n",
+    "# rerank results\n",
+    "cross_encoder_results = sorted(zip(data.docs_processed, cross_encoder_results), key=lambda x: x[1], reverse=True)\n",
+    "print(\"Cross encoder results:\")\n",
+    "final_results = cross_encoder_results[:3]\n",
+    "[print(doc.metadata, score) for (doc,score) in final_results]\n",
+    "print(\"bye\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "es where the space character is not used (like Chinese or Japanese).\n",
+      "\n",
+      "The other main feature of SentencePiece is *reversible tokenization*: since there is no special treatment of spaces, decoding the tokens is done simply by concatenating them and replacing the `_`s with spaces -- this results in the normalized text. As we saw earlier, the BERT tokenizer removes repeating spaces, so its tokenization is not reversible.\n",
+      "\n",
+      "## Algorithm overview[[algorithm-overview]]\n",
+      "\n",
+      "In the following sections, we'll dive into t\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(final_results[0][0].page_content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hf_qa_bot",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

qa_engine/mocks.py CHANGED Viewed

@@ -6,36 +6,22 @@ from langchain.llms.base import LLM
 class MockLocalBinaryModel(LLM):
     """
-    Mock Local Binary Model class, used for generating the string "a".
-    Args:
-        model_id (str): The ID of the model to be mocked.
-    Attributes:
-        model_path (str): The path to the model to be mocked.
-        llm (str): The string "a".
-    Raises:
-        ValueError: If the model_path does not exist.
     """
     model_path: str = None
-    llm: str = 'READY TO MOCK'
-    def __init__(self, model_id: str = None):
         super().__init__()
-        self.model_path = f'bot/question_answering/{model_id}'
-        if not os.path.exists(self.model_path):
-            raise ValueError(f'{self.model_path} does not exist')
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         return self.llm
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
-        return {'name_of_model': self.model_path}
     @property
     def _llm_type(self) -> str:
-        return self.model_path

 class MockLocalBinaryModel(LLM):
     """
+    Mock Local Binary Model class.
     """
     model_path: str = None
+    llm: str = 'Mocked Response'
+    def __init__(self):
         super().__init__()
     def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
         return self.llm
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
+        return {'name_of_model': 'mock'}
     @property
     def _llm_type(self) -> str:
+        return 'mock'

qa_engine/qa_engine.py CHANGED Viewed

@@ -18,6 +18,7 @@ from sentence_transformers import CrossEncoder
 from qa_engine import logger
 from qa_engine.response import Response
 class LocalBinaryModel(LLM):
@@ -191,6 +192,9 @@ class QAEngine():
                 model_url=llm_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
         else:
             logger.info('using transformers pipeline model')
             self.llm_model = TransformersPipelineModel(
@@ -224,6 +228,33 @@ class QAEngine():
             self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
     def get_response(self, question: str, messages_context: str = '') -> Response:
         """
         Generate an answer to the specified question.
@@ -267,8 +298,10 @@ class QAEngine():
             response.set_sources(sources=[str(m['source']) for m in metadata])
         logger.info('Running LLM chain')
-        answer = self.llm_chain.run(question=question, context=context)
-        response.set_answer(answer)
         logger.info('Received answer')
         if self.debug:
@@ -277,7 +310,8 @@ class QAEngine():
             logger.info(f'question len: {len(question)} {sep}')
             logger.info(f'question: {question} {sep}')
             logger.info(f'answer len: {len(response.get_answer())} {sep}')
-            logger.info(f'answer: {response.get_answer()} {sep}')
             logger.info(f'{response.get_sources_as_text()} {sep}')
             logger.info(f'messages_contex: {messages_context} {sep}')
             logger.info(f'relevant_docs: {relevant_docs} {sep}')

 from qa_engine import logger
 from qa_engine.response import Response
+from qa_engine.mocks import MockLocalBinaryModel
 class LocalBinaryModel(LLM):
                 model_url=llm_model_id.replace('api_models/', ''),
                 debug=self.debug
             )
+        elif llm_model_id == 'mock':
+            logger.info('using mock model')
+            self.llm_model = MockLocalBinaryModel()
         else:
             logger.info('using transformers pipeline model')
             self.llm_model = TransformersPipelineModel(
             self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
+    @staticmethod
+    def _preprocess_question(question: str) -> str:
+        if question[-1] != '?':
+            question += '?'
+        return question
+    @staticmethod
+    def _postprocess_answer(answer: str) -> str:
+        '''
+        Preprocess the answer by removing unnecessary sequences and stop sequences.
+        '''
+        SEQUENCES_TO_REMOVE = [
+            'Factually: ', 'Answer: ', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'
+        ]
+        SEQUENCES_TO_STOP = [
+            'User:', 'You:', 'Question:'
+        ]
+        for seq in SEQUENCES_TO_REMOVE:
+            answer = answer.replace(seq, '')
+        for seq in SEQUENCES_TO_STOP:
+            if seq in answer:
+                answer = answer[:answer.index(seq)]
+        answer = answer.strip()
+        return answer
     def get_response(self, question: str, messages_context: str = '') -> Response:
         """
         Generate an answer to the specified question.
             response.set_sources(sources=[str(m['source']) for m in metadata])
         logger.info('Running LLM chain')
+        question_processed = QAEngine._preprocess_question(question)
+        answer = self.llm_chain.run(question=question_processed, context=context)
+        answer_postprocessed = QAEngine._postprocess_answer(answer)
+        response.set_answer(answer_postprocessed)
         logger.info('Received answer')
         if self.debug:
             logger.info(f'question len: {len(question)} {sep}')
             logger.info(f'question: {question} {sep}')
             logger.info(f'answer len: {len(response.get_answer())} {sep}')
+            logger.info(f'answer original: {answer} {sep}')
+            logger.info(f'answer postprocessed: {response.get_answer()} {sep}')
             logger.info(f'{response.get_sources_as_text()} {sep}')
             logger.info(f'messages_contex: {messages_context} {sep}')
             logger.info(f'relevant_docs: {relevant_docs} {sep}')