File size: 1,715 Bytes
08cbc7a c572182 05fbe76 08cbc7a 05fbe76 6fb6f87 08cbc7a 54430c5 08cbc7a 26ff0cb 54430c5 08cbc7a 9667dfb 08cbc7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from datasets import load_dataset
from llama_index.core import VectorStoreIndex, Document
from llama_index.core.indices.query.query_transform.base import (
HyDEQueryTransform,
)
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
import gradio as gr
Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
# hyde = HyDEQueryTransform(include_original=True)
#documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever(
similarity_top_k=7, # Number of hits to return
vector_store_query_mode="default" # Basic semantic search
)
def ask(question):
nodes = retriever.retrieve(question)
full_result = ""
for node in nodes:
output = f"score: {str(node.score)}\nmetadata: {str(node.metadata)}\ntext: {node.text}\n\n\n"
full_result += output
return full_result
with gr.Blocks(title="Megillah Search") as demo:
gr.Markdown("# Megillah Search")
gr.Markdown("Search through the Megillah dataset")
question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...")
submit_btn = gr.Button("Search")
answer = gr.Textbox(label="Sources", lines=20)
submit_btn.click(fn=ask, inputs=question, outputs=answer)
question.submit(fn=ask, inputs=question, outputs=answer)
demo.launch(share=True)
|