not-lain commited on
Commit
42df98c
β€’
1 Parent(s): eaca477

🌘wπŸŒ–

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
- from datasets import load_dataset, Dataset
3
 
4
- # import faiss
5
  import os
6
  import spaces
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
@@ -31,6 +30,7 @@ data = load_dataset("not-lain/wikipedia-small-3000-embedded", subset="train")
31
  # index dataset
32
  data.add_faiss_index("embedding", device=1)
33
 
 
34
  @spaces.GPU
35
  def search(query: str, k: int = TOP_K):
36
  embedded_query = model.encode(query)
@@ -68,7 +68,9 @@ def talk(message, history):
68
  cleaned_past = item[1].split("\nRESOURCES:\n")[0]
69
  chat.append({"role": "assistant", "content": cleaned_past})
70
  chat.append({"role": "user", "content": message})
71
- messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
 
 
72
  # Tokenize the messages string
73
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
74
  streamer = TextIteratorStreamer(
@@ -100,12 +102,14 @@ TITLE = "RAG"
100
 
101
  DESCRIPTION = """
102
  ## Resources used to build this project
103
- * https://huggingface.co/mixedbread-ai/mxbai-colbert-large-v1
104
- * me 😎
105
- ## Models
106
- the models used in this space are :
107
- * google/gemma-7b-it
108
- * mixedbread-ai/mxbai-colbert-v1
 
 
109
  """
110
 
111
  demo = gr.ChatInterface(
 
1
  import gradio as gr
2
+ from datasets import load_dataset
3
 
 
4
  import os
5
  import spaces
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
30
  # index dataset
31
  data.add_faiss_index("embedding", device=1)
32
 
33
+
34
  @spaces.GPU
35
  def search(query: str, k: int = TOP_K):
36
  embedded_query = model.encode(query)
 
68
  cleaned_past = item[1].split("\nRESOURCES:\n")[0]
69
  chat.append({"role": "assistant", "content": cleaned_past})
70
  chat.append({"role": "user", "content": message})
71
+ messages = tokenizer.apply_chat_template(
72
+ chat, tokenize=False, add_generation_prompt=True
73
+ )
74
  # Tokenize the messages string
75
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
76
  streamer = TextIteratorStreamer(
 
102
 
103
  DESCRIPTION = """
104
  ## Resources used to build this project
105
+ * embedding model : https://huggingface.co/mixedbread-ai/mxbai-colbert-large-v1
106
+ * dataset : https://huggingface.co/datasets/not-lain/wikipedia-small-3000-embedded (used mxbai-colbert-large-v1 to create the embedding column )
107
+ * faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
108
+ * chatbot : google/gemma-7b-it
109
+
110
+ If you want to support my work please click on the heart react button β€οΈπŸ€—
111
+
112
+ <sub><sup><sub><sup>psst, I am still open for work if please reach me out at https://not-lain.github.io/</sup></sub></sup></sub>
113
  """
114
 
115
  demo = gr.ChatInterface(