|
from datasets import load_dataset |
|
from llama_index.core import VectorStoreIndex, Document |
|
from llama_index.core.indices.query.query_transform.base import ( |
|
HyDEQueryTransform, |
|
) |
|
from llama_index.core.query_engine import TransformQueryEngine |
|
from llama_index.core import Settings |
|
from llama_index.embeddings.openai import OpenAIEmbedding |
|
import gradio as gr |
|
|
|
Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small") |
|
|
|
|
|
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train") |
|
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset] |
|
|
|
|
|
|
|
index = VectorStoreIndex.from_documents(documents) |
|
retriever = index.as_retriever( |
|
similarity_top_k=7, |
|
vector_store_query_mode="default" |
|
) |
|
|
|
|
|
def ask(question): |
|
nodes = retriever.retrieve(question) |
|
full_result = "" |
|
for node in nodes: |
|
output = f"score: {str(node.score)}\nmetadata: {str(node.metadata)}\ntext: {node.text}\n\n\n" |
|
full_result += output |
|
return full_result |
|
|
|
|
|
with gr.Blocks(title="Megillah Search") as demo: |
|
gr.Markdown("# Megillah Search") |
|
gr.Markdown("Search through the Megillah dataset") |
|
|
|
question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...") |
|
submit_btn = gr.Button("Search") |
|
answer = gr.Textbox(label="Sources", lines=20) |
|
|
|
submit_btn.click(fn=ask, inputs=question, outputs=answer) |
|
question.submit(fn=ask, inputs=question, outputs=answer) |
|
demo.launch(share=True) |
|
|