from datasets import load_dataset from llama_index.core import VectorStoreIndex, Document import gradio as gr dataset=load_dataset("davidr70/megillah", split="train") documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset] #documents = SimpleDirectoryReader("data").load_data() index = VectorStoreIndex.from_documents(documents) retriever = index.as_retriever( similarity_top_k=5, # Number of hits to return vector_store_query_mode="default" # Basic semantic search ) def ask(question): nodes = retriever.retrieve(question) full_result = "" for node in nodes: print(node.score) print(node.metadata['section']) print(node.text) output = f"score: {str(node.score)}\nmetadata: {str(node.metadata['section'])}\ntext: {node.text}\n\n\n" full_result += output return full_result with gr.Blocks(title="Megillah Search") as demo: gr.Markdown("# Megillah Search") gr.Markdown("Search through the Megillah dataset") question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...") submit_btn = gr.Button("Search") answer = gr.Textbox(label="Sources", lines=20) submit_btn.click(fn=ask, inputs=question, outputs=answer) question.submit(fn=ask, inputs=question, outputs=answer) demo.launch(share=True)