embedder / app.py
davidr70's picture
use sugya organization
6fb6f87
raw
history blame
1.34 kB
from datasets import load_dataset
from llama_index.core import VectorStoreIndex, Document
import gradio as gr
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
#documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
retriever = index.as_retriever(
similarity_top_k=7, # Number of hits to return
vector_store_query_mode="default" # Basic semantic search
)
def ask(question):
nodes = retriever.retrieve(question)
full_result = ""
for node in nodes:
output = f"score: {str(node.score)}\nmetadata: {str(node.metadata)}\ntext: {node.text}\n\n\n"
full_result += output
return full_result
with gr.Blocks(title="Megillah Search") as demo:
gr.Markdown("# Megillah Search")
gr.Markdown("Search through the Megillah dataset")
question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...")
submit_btn = gr.Button("Search")
answer = gr.Textbox(label="Sources", lines=20)
submit_btn.click(fn=ask, inputs=question, outputs=answer)
question.submit(fn=ask, inputs=question, outputs=answer)
demo.launch(share=True)