from datasets import load_dataset from ragatouille import RAGPretrainedModel import gradio as gr dataset=load_dataset("davidr70/megillah_english_sugyot") documents = [] document_ids = [] metadatas = [] for row in dataset['train']: document_id = row['id'] if document_id not in document_ids: document_ids.append(document_id) documents.append(row['content']) metadatas.append(row['metadata']) RAG = RAGPretrainedModel.from_pretrained("answerdotai/answerai-colbert-small-v1") index_path = RAG.index( index_name="menachot_small_model", collection=documents, document_ids=document_ids, document_metadatas=metadatas ) def ask(question): results = RAG.search(question) full_result = "" for result in results: output = f"document_id: {result['document_id']}\nscore: {str(result['score'])}\nrank: {str(result['rank'])}\ntext: {result['content']}\n\n\n" full_result += output return full_result with gr.Blocks(title="Megillah Search") as demo: gr.Markdown("# Megillah Search") gr.Markdown("Search through the Megillah dataset") question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...") submit_btn = gr.Button("Search") answer = gr.Textbox(label="Sources", lines=20) submit_btn.click(fn=ask, inputs=question, outputs=answer) question.submit(fn=ask, inputs=question, outputs=answer) demo.launch(share=True)