{ "cells": [ { "cell_type": "markdown", "id": "73ef8baa", "metadata": {}, "source": [ "# Query Marqo Index" ] }, { "cell_type": "code", "execution_count": 1, "id": "dee4e8d3", "metadata": { "code_folding": [] }, "outputs": [], "source": [ "## Import packages\n", "import marqo as mq\n", "import pandas as pd\n", "import ipywidgets as widgets\n", "from IPython.display import display, HTML\n", "from IPython.core.display import Javascript\n", "from pprint import pprint" ] }, { "cell_type": "code", "execution_count": 2, "id": "e80297f9", "metadata": { "code_folding": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connected to onit-sonnini-DHd2025-prep.\n" ] } ], "source": [ "## Connect to Marqo\n", "\n", "MARQO_URL = \"http://your.ip:port\"\n", "marqoClient = mq.Client(url=MARQO_URL)\n", "#pprint(marqoClient.get_indexes())\n", "\n", "## DHd 2025 ##\n", "indexName = \"onit-sonnini-DHd2025-prep\" ## index with LLM-corrected texts as tensor field\n", "#indexName = \"onit-sonnini-DHd2025-clean\" ## index with cleaned texts as tensor field\n", "print(f'Connected to {indexName}.')\n", "\n", "# Load corpus data\n", "bc_corpus = pd.read_csv(\"data/ONiT_barcodes_ALL_metadata_ONB_status_2024-05-23.csv\")\n", "bc_corpus = bc_corpus.drop_duplicates(subset='barcode', keep='last')" ] }, { "cell_type": "code", "execution_count": 3, "id": "353081b8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'message': 'Welcome to Marqo', 'version': '2.5.1'}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "marqoClient.index(indexName).get_marqo()" ] }, { "cell_type": "code", "execution_count": 4, "id": "c9ceb6de", "metadata": { "code_folding": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | barcode | \n", "page | \n", "iiif_link | \n", "text_clean | \n", "text_orig | \n", "text_prep | \n", "
---|---|---|---|---|---|---|
0 | \n", "Z166069305 | \n", "5 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "C. S.' Sonnin is,\\nehemaligen Dffiziers uub In... | \n", "!\\n\\nC. S.' Sonnin i’s,\\n\\n;\\nehemaligen Dffiz... | \n", "C. S.' Sonnini's,\\nehemaligen Offiziers und In... | \n", "
1 | \n", "Z166069305 | \n", "6 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "2125 murid\\ngobiothers\\nconale\\nKOENISE\\nKAISE... | \n", "2125 murid\\n\\ngobiothers\\n\\nconale\\n\\nܪܝ\\n\\n، ... | \n", "2255 Murdoch \\nGouverneurs\\nComte\\nKOENIGSE\\nK... | \n", "
2 | \n", "Z166069305 | \n", "7 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "V o r re o e\\nDe 6 u i berpe Bet,$.\\nundteichl... | \n", "V o r re o e\\nDė 6 u i berpe Bét,$.\\n\\nundteic... | \n", "Vorrede.\\n\\nDeutschland hat in betreff seiner ... | \n", "
3 | \n", "Z166069305 | \n", "8 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Welt auf fich gezogen haben. Viele feiner Denk... | \n", "IV\\n\\n.\\n\\nWelt auf fich gezogen haben. Viele ... | \n", "Welt auf sich gezogen haben. Viele seiner Denk... | \n", "
4 | \n", "Z166069305 | \n", "9 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "und auslandische Sklaven genossen die Vortheil... | \n", ")\\n\\n-\\nI\\n\\nI\\n\\n1\\n\\n11\\n\\nr.\\n\\nund ausländ... | \n", "Und ausländische Sklaven genossen die Vortheil... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
486 | \n", "Z166069305 | \n", "494 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "<empty page> | \n", "+\\n\\nè³½\\n\\n1\\n\\n\\n | \n", "<empty page> | \n", "
487 | \n", "Z166069305 | \n", "495 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Riedel fo\\nBedminenzelt | \n", ".7.\\n\\n.3.6\\n\\n>\\n\\n1 0\\n\\nRiedel fo\\n\\n(\\n\\n(... | \n", "Riedel von Eisenbach | \n", "
488 | \n", "Z166069305 | \n", "499 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Osterreichische Nationalbibliothek\\n+ Z166069305 | \n", "Österreichische Nationalbibliothek\\n\\n+ Z1660... | \n", "Please provide the faulty OCR texts generated ... | \n", "
489 | \n", "Z166069305 | \n", "503 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "<empty page> | \n", "{\\n \"status code\" : 404,\\n \"message\" : \"The ... | \n", "<empty page> | \n", "
490 | \n", "Z166069305 | \n", "504 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "<empty page> | \n", "{\\n \"status code\" : 404,\\n \"message\" : \"The ... | \n", "<empty page> | \n", "
491 rows × 6 columns
\n", "\n", " | document | \n", "rrf_score | \n", "barcode | \n", "page | \n", "iiif_link | \n", "text_orig | \n", "text_clean | \n", "text_prep | \n", "_id | \n", "_highlights | \n", "_score | \n", "rank | \n", "corpus | \n", "rerank | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Z166069305_430 | \n", "0.032522 | \n", "Z166069305 | \n", "430 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "402 befand mich in einiger Entfernung davon, ... | \n", "befand mich in einiger Entfernung davon, und d... | \n", "befand mich in einiger Entfernung davon, und d... | \n", "7e2b21a1-4cdb-4b6a-b1c0-75c30bfe13bb | \n", "[{'text_prep': 'befand mich in einiger Entfern... | \n", "0.892140 | \n", "1/2 | \n", "D19 | \n", "1 | \n", "
1 | \n", "Z166069305_10 | \n", "0.030118 | \n", "Z166069305 | \n", "10 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "VI große Flinten, die ihm ſeine Diener zur Se... | \n", "grose Flinten, die ihm seine Diener zur Seite ... | \n", "große Flinten, die ihm seine Diener zur Seite ... | \n", "ab0905d4-6ca7-4e94-8fb2-3d2081632d6d | \n", "[{'text_prep': 'Sein letztes Hulfritt trifft z... | \n", "0.872264 | \n", "4/9 | \n", "D19 | \n", "2 | \n", "
2 | \n", "Z166069305_399 | \n", "0.029324 | \n", "Z166069305 | \n", "399 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "1 1 1 , 571 Plinius berichtet nach dem Xen... | \n", "Plinius berichtet nach dem Xenophon, die Camel... | \n", "Plinius berichtet nach dem Xenophon, die Camel... | \n", "feb1b47b-effe-4e9d-be0f-7a749fed5ec0 | \n", "[{'text_prep': 'Ich muss hier auch bemerken, d... | \n", "0.865306 | \n", "13/4 | \n", "D19 | \n", "3 | \n", "
3 | \n", "Z166069305_415 | \n", "0.028860 | \n", "Z166069305 | \n", "415 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "587 nen. Ich ritt auf fie los, aber Hufrein l... | \n", "nen. Ich ritt auf fie los, aber Hufrein lies m... | \n", "Ich ritt auf sie los, aber Hufrein lies mich r... | \n", "02bb750e-0993-4341-ab74-934d28fc523e | \n", "[{'text_prep': 'Ehe wir noch an dieser Ore anl... | \n", "0.864078 | \n", "17/3 | \n", "D19 | \n", "4 | \n", "
4 | \n", "Z166069305_220 | \n", "0.028485 | \n", "Z166069305 | \n", "220 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "' 92 lungen und Bewegungen. Mit einer Phyſiog... | \n", "lungen und Bewegungen. Mit einer Physiognomie,... | \n", "Lungen und Bewegungen. Mit einer Physiognomie,... | \n", "7949fe4e-5534-4522-b151-35b57a733650 | \n", "[{'text_prep': 'Die Haustiere finden nirgends ... | \n", "0.869331 | \n", "6/15 | \n", "D19 | \n", "5 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
478 | \n", "Z166069305_493 | \n", "0.001901 | \n", "Z166069305 | \n", "493 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Pl. 5. Fig.2. Fig.3. Fig. 1 B Riedel fee... | \n", "Pl. 5. Fig.2. Fig.3. Fig. 1 Riedel fee. Fig. 1... | \n", "Pl. 5. Fig. 2. Fig. 3. Fig. 1 Riedel fee. Fig.... | \n", "131e6f12-db13-422f-96e4-0924fe19026b | \n", "[{'text_prep': 'Fig. 1 Riedel fee.'}] | \n", "0.819874 | \n", "466 | \n", "D19 | \n", "479 | \n", "
479 | \n", "Z166069305_484 | \n", "0.001898 | \n", "Z166069305 | \n", "484 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "1 - 1 Seite 109 Zeile I v. u. Abanſoon lies... | \n", "Seite 109 Zeile I v. u. Abansoon lies as anson... | \n", "Seite 109 Zeile I v. u. Abends sonnig, ansonst... | \n", "da3ced63-91a8-4710-81b6-40da8a1cf407 | \n", "[{'text_prep': 'Seite 109 Zeile I v. u. Abends... | \n", "0.818248 | \n", "467 | \n", "D19 | \n", "480 | \n", "
480 | \n", "Z166069305_489 | \n", "0.001894 | \n", "Z166069305 | \n", "489 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Th. PL 3 ஏப்ரதம் THUNDE 1 4 Ironis ту C... | \n", "Th. PL 3 THUNDE Ironis Cine kanclerte tatue be... | \n", "Theodor Pl. 3 Thundersturm Ironische cine kan... | \n", "228e98a9-2d29-4f34-a0c2-adcdd2eeff09 | \n", "[{'text_prep': 'Theodor Pl. 3 Thundersturm Ir... | \n", "0.818178 | \n", "468 | \n", "D19 | \n", "481 | \n", "
481 | \n", "Z166069305_482 | \n", "0.001890 | \n", "Z166069305 | \n", "482 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "ie \" !!! 3 iC; 1: 0 Wis . bici\" ..) .\"., 6... | \n", "iC; 1: 0 Wis . bici\" ..) .\"., 6,7, ART 9 * noi... | \n", "Ich kam also um die neunte Stunde zu Wismar. | \n", "9e9b9254-c675-4427-a20e-88b8d723aec3 | \n", "[{'text_prep': 'Ich kam also um die neunte Stu... | \n", "0.812620 | \n", "469 | \n", "D19 | \n", "482 | \n", "
482 | \n", "Z166069305_499 | \n", "0.001887 | \n", "Z166069305 | \n", "499 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "Österreichische Nationalbibliothek + Z166069305 | \n", "Osterreichische Nationalbibliothek + Z166069305 | \n", "Please provide the faulty OCR texts generated ... | \n", "93723fd2-f359-4251-8621-513c4f6e1128 | \n", "[{'text_prep': '(Please paste the text)'}] | \n", "0.805837 | \n", "470 | \n", "D19 | \n", "483 | \n", "
483 rows × 14 columns
\n", "Retrieved text chunk: {row[\"unpacked_highlights\"]}
\n", "{highlight_text(row[text_col], row[\"unpacked_highlights\"])}
\n", "\n", " | Sentence | \n", "Cosine Similarity | \n", "
---|---|---|
0 | \n", "S. sono. Gmelin hat diesen Vogel in der 13. | \n", "0.587100 | \n", "
1 | \n", "Ich todtete eine gelbe Bachstelze ***) und ein... | \n", "0.293896 | \n", "
2 | \n", "Auf diesem Damme stehen von Zeit zu Zeit klein... | \n", "0.347629 | \n", "
3 | \n", "Indessen schienen mir diese Vögel sich mehr de... | \n", "0.471382 | \n", "
4 | \n", "Die Wanderung dieser Vögel nach Ägypten hat je... | \n", "0.476594 | \n", "
5 | \n", "Ich traf auch eine Wachtel an, die ich sogleic... | \n", "0.200288 | \n", "
6 | \n", "Diese Vögel sind sehr fett, ihr Fleisch ist za... | \n", "0.407871 | \n", "
7 | \n", "Reichhaltig war er. Kein Vogel ist in Ägypten ... | \n", "0.515706 | \n", "
8 | \n", "Linn. Den ersten Morgens bei einem sehr schöne... | \n", "0.310032 | \n", "
9 | \n", "Bearbeitung der Gegenden um den Canal Salza. F... | \n", "0.271253 | \n", "
10 | \n", "Diese Vögel hielten sich nicht in der Nähe von | \n", "0.515144 | \n", "
11 | \n", "nennt. Diese letztgenannten Vögel sprangen paa... | \n", "0.336441 | \n", "
12 | \n", "Binther- Nilaat- Schildfröste- te- Vogel- Raub... | \n", "0.163589 | \n", "
13 | \n", "Durch diese habe ich auch den Unterschied des ... | \n", "0.268166 | \n", "
14 | \n", "Sie sagten uns, diese Vögel kommen oft nach Ab... | \n", "0.320704 | \n", "
15 | \n", "Es hielt daher sehr schwer, sich einen von die... | \n", "0.352532 | \n", "
16 | \n", "Senesbaum Vogel Beschreibung einer Art von Fal... | \n", "0.317160 | \n", "
17 | \n", "Diese Vögel gehören nicht zu einerlei Art. Die... | \n", "0.433839 | \n", "
18 | \n", "Ich erkannte Beccafien, Feldlerchen und Sperli... | \n", "0.303594 | \n", "
19 | \n", "Kapitel. Natron -- Bleichen der Leinwand und d... | \n", "0.265617 | \n", "
\n", " | document | \n", "rrf_score | \n", "barcode | \n", "page | \n", "iiif_link | \n", "text_orig | \n", "text_clean | \n", "text_prep | \n", "_id | \n", "_highlights | \n", "_score | \n", "rank | \n", "corpus | \n", "rerank | \n", "unpacked_highlights | \n", "ST_cosine_similarity | \n", "onb_viewer_link | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Z166069305_252 | \n", "0.032258 | \n", "Z166069305 | \n", "252 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "224 7 Dieſer Falke war ein Weibchen. Der Ein... | \n", "Dieser Falke war ein Weibchen. Der Eingeweide ... | \n", "Dieser Falke war ein Weibchen. Der Eingeweidek... | \n", "8a5893c6-fde6-42f3-94e3-16db95629fe5 | \n", "[{'text_prep': 'S. sono. Gmelin hat diesen Vog... | \n", "0.856740 | \n", "2 | \n", "D19 | \n", "1 | \n", "S. sono. Gmelin hat diesen Vogel in der 13. | \n", "0.587100 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
1 | \n", "Z166069305_43 | \n", "0.031025 | \n", "Z166069305 | \n", "43 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "16 5 1 entgegen, indem er mit großer Heftig... | \n", "entgegen, indem er mit groser Heftigkeit nach ... | \n", "entgegen, indem er mit großer Heftigkeit nach ... | \n", "4205ec0a-9cf3-448d-afba-3023e4c92052 | \n", "[{'text_prep': 'Ich todtete eine gelbe Bachste... | \n", "0.852868 | \n", "3/6 | \n", "D19 | \n", "2 | \n", "Ich todtete eine gelbe Bachstelze ***) und ein... | \n", "0.293896 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
2 | \n", "Z166069305_265 | \n", "0.029911 | \n", "Z166069305 | \n", "265 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "237 Seeufer, an welchem wir bis zum See Maadi... | \n", "Seeufer, an welchem wir bis zum See Maadie hin... | \n", "Seeufer, an welchem wir bis zum See Maadie hin... | \n", "5fc895c1-b587-4442-a34c-dd446c3054f2 | \n", "[{'text_prep': 'Auf diesem Damme stehen von Ze... | \n", "0.848943 | \n", "4/10 | \n", "D19 | \n", "3 | \n", "Auf diesem Damme stehen von Zeit zu Zeit klein... | \n", "0.347629 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
3 | \n", "Z166069305_344 | \n", "0.029412 | \n", "Z166069305 | \n", "344 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "316 niederſchoß, wünſchte ich mir Glük, dieſe... | \n", "niederschos, wunschte ich mir Gluk, diesen uns... | \n", "Niederschoss, wünschte ich mir Glück, diesen u... | \n", "a7debf83-ac70-445a-afee-5f35515b3e1d | \n", "[{'text_prep': 'Indessen schienen mir diese Vö... | \n", "0.843653 | \n", "8 | \n", "D19 | \n", "4 | \n", "Indessen schienen mir diese Vögel sich mehr de... | \n", "0.471382 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
4 | \n", "Z166069305_243 | \n", "0.027778 | \n", "Z166069305 | \n", "243 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "215 don -einſam liegenden Seen beleben, bleib... | \n", "don -einsam liegenden Seen beleben, bleiben di... | \n", "don-einsam liegenden Seen beleben, bleiben die... | \n", "58487609-e48c-47a1-8a0a-06569fef7f53 | \n", "[{'text_prep': 'Die Wanderung dieser Vögel nac... | \n", "0.835293 | \n", "24/3 | \n", "D19 | \n", "5 | \n", "Die Wanderung dieser Vögel nach Ägypten hat je... | \n", "0.476594 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
413 | \n", "Z166069305_70 | \n", "0.002119 | \n", "Z166069305 | \n", "70 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "42 ohne Thellung und ohne Abgaben die Früchte... | \n", "ohne Thellung und ohne Abgaben die Fruchte ihr... | \n", "ohne Thellung und ohne Abgaben die Früchte ihr... | \n", "06af6d12-1ddf-492d-8848-2537dc8b4a9c | \n", "[{'text_prep': 'Vorzüglich zog Frankreich groß... | \n", "0.794844 | \n", "412 | \n", "D19 | \n", "414 | \n", "Vorzüglich zog Frankreich große Vorteile darau... | \n", "0.237914 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
414 | \n", "Z166069305_299 | \n", "0.002114 | \n", "Z166069305 | \n", "299 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "271 i Auf meiner ganzen Reife habe ich und m... | \n", "Auf meiner ganzen Reife habe ich und meine Rei... | \n", "Auf meiner ganzen Reise habe ich und meine Gef... | \n", "01d37f8c-4a15-42a2-9ef5-a73fc7739766 | \n", "[{'text_prep': 'Der Fluss sieht rotlich und en... | \n", "0.794709 | \n", "413 | \n", "D19 | \n", "415 | \n", "Der Fluss sieht rotlich und endlich grünlich a... | \n", "0.106832 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
415 | \n", "Z166069305_410 | \n", "0.002110 | \n", "Z166069305 | \n", "410 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "382 ilmy 1 \" mann, der feſt von dieſem Ged... | \n", "ilmy mann, der fest von diesem Gedanken uberze... | \n", "Ilmymann, der fest von diesem Gedanken überzeu... | \n", "dde560fc-9473-4faa-9e3c-1280ba131213 | \n", "[{'text_prep': 'Sie hielten mich in ihrer Einb... | \n", "0.791636 | \n", "414 | \n", "D19 | \n", "416 | \n", "Sie hielten mich in ihrer Einbildung für einen... | \n", "0.140597 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
416 | \n", "Z166069305_75 | \n", "0.002105 | \n", "Z166069305 | \n", "75 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "47 3 dle ich hier gemacht habe, werden iðre ... | \n", "dle ich hier gemacht habe, werden ire Stelle i... | \n", "Die ich hier gemacht habe, werden ihre Stelle ... | \n", "32dd47f1-c7f4-4d4a-bb50-f428f79fce3b | \n", "[{'text_prep': 'Die ich hier gemacht habe, wer... | \n", "0.791505 | \n", "415 | \n", "D19 | \n", "417 | \n", "Die ich hier gemacht habe, werden ihre Stelle ... | \n", "0.136045 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
417 | \n", "Z166069305_6 | \n", "0.002101 | \n", "Z166069305 | \n", "6 | \n", "https://iiif.onb.ac.at/images/ABO/Z166069305/0... | \n", "2125 murid gobiothers conale ܪܝ ، فرد. برد... | \n", "2125 murid gobiothers conale KOENISE KAISERLIC... | \n", "2255 Murdoch Gouverneurs Comte KOENIGSE KAISE... | \n", "21bb54fb-3e4e-456c-9605-f89970db24b7 | \n", "[{'text_prep': '2255 Murdoch Gouverneurs Comt... | \n", "0.791373 | \n", "416 | \n", "D19 | \n", "418 | \n", "2255 Murdoch Gouverneurs Comte KOENIGSE KAISE... | \n", "0.109442 | \n", "https://digital.onb.ac.at/OnbViewer/viewer.fac... | \n", "
418 rows × 17 columns
\n", "