Spaces:

Yoxas
/

testchatbot

Runtime error

App Files Files Community

Yoxas commited on Jun 1, 2024

Commit

bce5d54

verified ·

1 Parent(s): cfee418

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -22

app.py CHANGED Viewed

@@ -19,7 +19,10 @@ data = dataset["train"]
 # Convert the string embeddings to numerical arrays
 def convert_and_ensure_2d_embeddings(example):
     # Convert the string to a numpy array
-    embeddings = np.fromstring(example['embedding'].strip("[]"), sep=' ', dtype=np.float32)
     # Ensure the embeddings are 2-dimensional
     if embeddings.ndim == 1:
         embeddings = embeddings.reshape(1, -1)
@@ -34,7 +37,7 @@ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 # use quantization to lower GPU usage
 bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
@@ -119,23 +122,4 @@ A rag pipeline with a chatbot feature
 Resources used to build this project :
 * embedding model : https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
 * dataset : https://huggingface.co/datasets/not-lain/wikipedia
-* faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
-* chatbot : https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-"""
-demo = gr.ChatInterface(
-    fn=talk,
-    chatbot=gr.Chatbot(
-        show_label=True,
-        show_share_button=True,
-        show_copy_button=True,
-        likeable=True,
-        layout="bubble",
-        bubble_full_width=False,
-    ),
-    theme="Soft",
-    examples=[["what's anarchy ? "]],
-    title=TITLE,
-    description=DESCRIPTION,
-)
-demo.launch(debug=True)

 # Convert the string embeddings to numerical arrays
 def convert_and_ensure_2d_embeddings(example):
     # Convert the string to a numpy array
+    embedding_str = example['embedding']
+    embedding_str = embedding_str.replace('\n', ' ')
+    embedding_list = list(map(float, embedding_str.strip("[]").split()))
+    embeddings = np.array(embedding_list, dtype=np.float32)
     # Ensure the embeddings are 2-dimensional
     if embeddings.ndim == 1:
         embeddings = embeddings.reshape(1, -1)
 # use quantization to lower GPU usage
 bnb_config = BitsAndBytesConfig(
+    load_in 4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
 Resources used to build this project :
 * embedding model : https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
 * dataset : https://huggingface.co/datasets/not-lain/wikipedia
+* faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets