try hyde query
Browse files
app.py
CHANGED
@@ -1,17 +1,22 @@
|
|
1 |
from datasets import load_dataset
|
2 |
from llama_index.core import VectorStoreIndex, Document
|
|
|
|
|
|
|
|
|
3 |
import gradio as gr
|
4 |
|
5 |
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
|
6 |
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
|
7 |
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
|
8 |
|
9 |
-
|
10 |
#documents = SimpleDirectoryReader("data").load_data()
|
11 |
index = VectorStoreIndex.from_documents(documents)
|
12 |
retriever = index.as_retriever(
|
13 |
similarity_top_k=7, # Number of hits to return
|
14 |
-
vector_store_query_mode="default" # Basic semantic search
|
|
|
15 |
)
|
16 |
|
17 |
|
|
|
1 |
from datasets import load_dataset
|
2 |
from llama_index.core import VectorStoreIndex, Document
|
3 |
+
from llama_index.core.indices.query.query_transform.base import (
|
4 |
+
HyDEQueryTransform,
|
5 |
+
)
|
6 |
+
from llama_index.core.query_engine import TransformQueryEngine
|
7 |
import gradio as gr
|
8 |
|
9 |
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
|
10 |
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
|
11 |
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
|
12 |
|
13 |
+
hyde = HyDEQueryTransform(include_original=True)
|
14 |
#documents = SimpleDirectoryReader("data").load_data()
|
15 |
index = VectorStoreIndex.from_documents(documents)
|
16 |
retriever = index.as_retriever(
|
17 |
similarity_top_k=7, # Number of hits to return
|
18 |
+
vector_store_query_mode="default", # Basic semantic search
|
19 |
+
query_transform=hyde
|
20 |
)
|
21 |
|
22 |
|