# Query Marqo Index

In [1]:
## Import packages
import marqo as mq
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
from IPython.core.display import Javascript
from pprint import pprint

In [2]:
## Connect to Marqo

MARQO_URL = "http://your.ip:port"
marqoClient = mq.Client(url=MARQO_URL)
#pprint(marqoClient.get_indexes())

## DHd 2025 ##
indexName = "onit-sonnini-DHd2025-prep" ## index with LLM-corrected texts as tensor field
#indexName = "onit-sonnini-DHd2025-clean" ## index with cleaned texts as tensor field
print(f'Connected to {indexName}.')

# Load corpus data
bc_corpus = pd.read_csv("data/ONiT_barcodes_ALL_metadata_ONB_status_2024-05-23.csv")
bc_corpus = bc_corpus.drop_duplicates(subset='barcode', keep='last')

Connected to onit-sonnini-DHd2025-prep.


In [3]:
marqoClient.index(indexName).get_marqo()

{'message': 'Welcome to Marqo', 'version': '2.5.1'}

In [4]:
# Preview data
import pandas as pd

index_DHd = pd.read_csv("C:/onit_rag/data/DHd_index-cleaned.csv")
index_DHd

Unnamed: 0,barcode,page,iiif_link,text_clean,text_orig,text_prep
0,Z166069305,5,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"C. S.' Sonnin is,\nehemaligen Dffiziers uub In...","!\n\nC. S.' Sonnin i’s,\n\n;\nehemaligen Dffiz...","C. S.' Sonnini's,\nehemaligen Offiziers und In..."
1,Z166069305,6,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,2125 murid\ngobiothers\nconale\nKOENISE\nKAISE...,2125 murid\n\ngobiothers\n\nconale\n\nܪܝ\n\n، ...,2255 Murdoch \nGouverneurs\nComte\nKOENIGSE\nK...
2,Z166069305,7,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"V o r re o e\nDe 6 u i berpe Bet,$.\nundteichl...","V o r re o e\nDė 6 u i berpe Bét,$.\n\nundteic...",Vorrede.\n\nDeutschland hat in betreff seiner ...
3,Z166069305,8,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,Welt auf fich gezogen haben. Viele feiner Denk...,IV\n\n.\n\nWelt auf fich gezogen haben. Viele ...,Welt auf sich gezogen haben. Viele seiner Denk...
4,Z166069305,9,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,und auslandische Sklaven genossen die Vortheil...,)\n\n-\nI\n\nI\n\n1\n\n11\n\nr.\n\nund ausländ...,Und ausländische Sklaven genossen die Vortheil...
...,...,...,...,...,...,...
486,Z166069305,494,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,<empty page>,+\n\nè³½\n\n1\n\n\n,<empty page>
487,Z166069305,495,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,Riedel fo\nBedminenzelt,.7.\n\n.3.6\n\n>\n\n1 0\n\nRiedel fo\n\n(\n\n(...,Riedel von Eisenbach
488,Z166069305,499,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,Osterreichische Nationalbibliothek\n+ Z166069305,Ã–sterreichische Nationalbibliothek\n\n+ Z1660...,Please provide the faulty OCR texts generated ...
489,Z166069305,503,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,<empty page>,"{\n ""status code"" : 404,\n ""message"" : ""The ...",<empty page>


In [14]:
## Query parameter

query = "Pferd, Pferde"
limit = 1000 # max limit = 1000
threshold = 0.7 # set threshold
filter_string = "barcode:(Z166069305)" # Sonnini Bd. 1

In [15]:
## Query the index

#results = marqoClient.index(indexName).search(q=query, limit=limit) # basic search
results_tensor = marqoClient.index(indexName).search(q=query, limit=limit, filter_string=filter_string) # tensor search
print('Output tensor search: ', len(results_tensor['hits']))
results_lexical = marqoClient.index(indexName).search(q=query, limit=limit, filter_string=filter_string, search_method="LEXICAL") # keyword search (BM25)
print('Output lexical search: ', len(results_lexical['hits']))

## Load into dataframes

# results tensor search
output1 = pd.DataFrame(results_tensor["hits"])
# Filter rows where _highlights are 5 tokens or more
output1 = output1[output1["_highlights"].apply(lambda x: len(str(x).split()) >= 5)]
print('Output tensor search after filtering vectors < 5 tokens: ', len(output1))
### Filter rows where _score is >= threshold
output1 = output1[output1["_score"] >= threshold]
print('Output tensor search with threshold: ', len(output1))
# Add corpus metadata
# Merge the two DataFrames on the 'barcode' column to add 'corpus'
output1 = output1.merge(bc_corpus, on='barcode', how='inner')
# Add index +1 as a new column called 'rank'
output1['rank'] = output1.index + 1
# Add document identifier
output1['document'] = output1.apply(lambda row: f"{row['barcode']}_{row['page']}", axis=1)

# results keyword search
if len(results_lexical['hits']) == 0:
    output2 = output1
    print("No lexical search results! Using only tensor results.")
    
else:
    output2 = pd.DataFrame(results_lexical["hits"])
    # Add corpus metadata
    # Merge the two DataFrames on the 'barcode' column to add 'corpus'
    output2 = output2.merge(bc_corpus, on='barcode', how='inner')
    # Add index +1 as a new column called 'rank'
    output2['rank'] = output2.index + 1#apply(lambda row: output2.index + 1, axis=1)
    # Add document identifier
    output2['document'] = output2.apply(lambda row: f"{row['barcode']}_{row['page']}", axis=1)

Output tensor search:  491
Output lexical search:  28
Output tensor search after filtering vectors < 5 tokens:  447
Output tensor search with threshold:  447


In [11]:
## Reciprocal Rank Fusion (RRF)

# Set the RRF parameter
k = 60

# Concatenate the DataFrames to combine all rankings into one DataFrame
combined_df = pd.concat([output1, output2], ignore_index=True)

# Initialize the RRF score column to 0
combined_df['rrf_score'] = 0

# Function to update RRF scores
def update_rrf_scores(df, k):
    # Calculate RRF score using the formula and add to the rrf_score column
    df['rrf_score'] += 1 / (k + df['rank'])

# Update RRF scores for the combined DataFrame
update_rrf_scores(combined_df, k)

# Custom aggregation function to combine values with a '/'
def combine_values(values):
    # Convert to string and join unique values with a '/' separator
    return '/'.join(values.astype(str).unique())

# Group by 'item' and sum the RRF scores for each item
final_scores_df = final_scores_df = combined_df.groupby('document', as_index=False).agg({
    'rrf_score': 'sum',
    'barcode': 'first',
    'page': 'first',
    'iiif_link': 'first',
    'text_orig': 'first',
    'text_clean': 'first',
    'text_prep': 'first',
    '_id': 'first',
    '_highlights': 'first',
    '_score': 'first',
    'rank': combine_values,
    'corpus': 'first'
})

# Sort by total RRF score in descending order and reset index
final_scores_df = final_scores_df.sort_values(by='rrf_score', ascending=False).reset_index(drop=True)

# Add rerank based on the sorted order
final_scores_df['rerank'] = final_scores_df.index + 1

# Display the final DataFrame
#print(f'Total results: {len(final_scores_df)}')
#final_scores_df

final_scores_df

Unnamed: 0,document,rrf_score,barcode,page,iiif_link,text_orig,text_clean,text_prep,_id,_highlights,_score,rank,corpus,rerank
0,Z166069305_430,0.032522,Z166069305,430,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"402 befand mich in einiger Entfernung davon, ...","befand mich in einiger Entfernung davon, und d...","befand mich in einiger Entfernung davon, und d...",7e2b21a1-4cdb-4b6a-b1c0-75c30bfe13bb,[{'text_prep': 'befand mich in einiger Entfern...,0.892140,1/2,D19,1
1,Z166069305_10,0.030118,Z166069305,10,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"VI große Flinten, die ihm ſeine Diener zur Se...","grose Flinten, die ihm seine Diener zur Seite ...","große Flinten, die ihm seine Diener zur Seite ...",ab0905d4-6ca7-4e94-8fb2-3d2081632d6d,[{'text_prep': 'Sein letztes Hulfritt trifft z...,0.872264,4/9,D19,2
2,Z166069305_399,0.029324,Z166069305,399,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"1 1 1 , 571 Plinius berichtet nach dem Xen...","Plinius berichtet nach dem Xenophon, die Camel...","Plinius berichtet nach dem Xenophon, die Camel...",feb1b47b-effe-4e9d-be0f-7a749fed5ec0,"[{'text_prep': 'Ich muss hier auch bemerken, d...",0.865306,13/4,D19,3
3,Z166069305_415,0.028860,Z166069305,415,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"587 nen. Ich ritt auf fie los, aber Hufrein l...","nen. Ich ritt auf fie los, aber Hufrein lies m...","Ich ritt auf sie los, aber Hufrein lies mich r...",02bb750e-0993-4341-ab74-934d28fc523e,[{'text_prep': 'Ehe wir noch an dieser Ore anl...,0.864078,17/3,D19,4
4,Z166069305_220,0.028485,Z166069305,220,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,' 92 lungen und Bewegungen. Mit einer Phyſiog...,"lungen und Bewegungen. Mit einer Physiognomie,...","Lungen und Bewegungen. Mit einer Physiognomie,...",7949fe4e-5534-4522-b151-35b57a733650,[{'text_prep': 'Die Haustiere finden nirgends ...,0.869331,6/15,D19,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478,Z166069305_493,0.001901,Z166069305,493,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,Pl. 5. Fig.2. Fig.3. Fig. 1 B Riedel fee...,Pl. 5. Fig.2. Fig.3. Fig. 1 Riedel fee. Fig. 1...,Pl. 5. Fig. 2. Fig. 3. Fig. 1 Riedel fee. Fig....,131e6f12-db13-422f-96e4-0924fe19026b,[{'text_prep': 'Fig. 1 Riedel fee.'}],0.819874,466,D19,479
479,Z166069305_484,0.001898,Z166069305,484,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,1 - 1 Seite 109 Zeile I v. u. Abanſoon lies...,Seite 109 Zeile I v. u. Abansoon lies as anson...,"Seite 109 Zeile I v. u. Abends sonnig, ansonst...",da3ced63-91a8-4710-81b6-40da8a1cf407,[{'text_prep': 'Seite 109 Zeile I v. u. Abends...,0.818248,467,D19,480
480,Z166069305_489,0.001894,Z166069305,489,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,Th. PL 3 ஏப்ரதம் THUNDE 1 4 Ironis ту C...,Th. PL 3 THUNDE Ironis Cine kanclerte tatue be...,Theodor Pl. 3 Thundersturm Ironische cine kan...,228e98a9-2d29-4f34-a0c2-adcdd2eeff09,[{'text_prep': 'Theodor Pl. 3 Thundersturm Ir...,0.818178,468,D19,481
481,Z166069305_482,0.001890,Z166069305,482,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"ie "" !!! 3 iC; 1: 0 Wis . bici"" ..) .""., 6...","iC; 1: 0 Wis . bici"" ..) .""., 6,7, ART 9 * noi...",Ich kam also um die neunte Stunde zu Wismar.,9e9b9254-c675-4427-a20e-88b8d723aec3,[{'text_prep': 'Ich kam also um die neunte Stu...,0.812620,469,D19,482


In [None]:
## Multi-Term Queries

query = 'Nur Pflanzen, Vegetation und Flora'
limit = 1000 # max limit = 1000
threshold = 0.80 # set threshold
filter_string = "barcode:(Z166069305) OR barcode:(Z166069408)" # Sonnini

results = marqoClient.index(indexName).search(
    {
        query: 2.0,
        "Tiere oder jegliche Fauna": -0.8,
        "wichtige Ernährung für die Bevölkerung": +0.8,
        #"Meteorologische Beobachtungen": -0.8,
    },
    limit=limit,
    #filter_string=filter_string
)

output = pd.DataFrame(results["hits"])
output = output[output['_score'] > threshold] # filter results above threshold

print(f'Total results: {len(output)}')
#output

# Generate Preview

In [16]:
## Preview R matches

R=20 # set number of results to be previewed

# Function to unpack and concatenate all texts from the lists of dictionaries
def unpack_texts(series):
    return series.apply(lambda x: ' '.join([d.get('text_clean', d.get('text_prep', '')) for d in x if isinstance(d, dict)]))

# Apply the function to the '_highlights' column and create a new column 'unpacked_highlights'
final_scores_df['unpacked_highlights'] = unpack_texts(final_scores_df['_highlights'])
#print(output['_highlights'])

text_col = 'text_prep' if 'text_prep' in final_scores_df.columns else 'text_clean'

# Check if 'unpacked_highlights' is empty and replace it if necessary
final_scores_df['unpacked_highlights'] = final_scores_df.apply(
    lambda row: row[text_col] if not row['unpacked_highlights'] else row['unpacked_highlights'],
    axis=1
)

# Define a function to highlight parts of the text
def highlight_text(text, highlights):
    # Ensure highlights is a list of strings
    if isinstance(highlights, str):
        highlights = [highlights]
    # Wrap each highlight in <mark> tags
    for highlight in highlights:
        # Replace highlight text with a highlighted version
        text = text.replace(highlight, f'<mark>{highlight}</mark>')
    return text

# Define a function to display full details of a row
def show_details(row):
    details = widgets.Output()
    with details:
        # Use inline styles to ensure no scrolling
        content = f"""
        <div style="border: 1px solid black; padding: 10px; max-width: 100%; box-sizing: border-box;">
            <h3>Details for {row['barcode']}, {row['page']}</h3>
            <p><b>Retrieved text chunk: </b><i>{row["unpacked_highlights"]}</i></p>
            <p>{highlight_text(row[text_col], row["unpacked_highlights"])}</p>
            <img src="{row["iiif_link"]}" alt="IIIF Image Preview" style="max-width: 100%; height: auto;">
            <a href="https://digital.onb.ac.at/OnbViewer/viewer.faces?doc=ABO_%2B{row["barcode"]}" target="_blank">Open ÖNB Viewer</a>
        </div>
        """
        display(HTML(content))
    return details

# Define a function to create a row with more information and a clickable button
def create_row(row, idx):
    button = widgets.Button(description="Inspect", layout=widgets.Layout(width="auto"))

    # When the button is clicked, show more details
    def on_button_clicked(b):
        details_box.clear_output()
        with details_box:
            display(show_details(row))
    
    button.on_click(on_button_clicked)
    
    # Highlight the text in the 'unpacked_highlights' column
    unpacked = highlight_text(row['unpacked_highlights'], row['unpacked_highlights'])
    
    # Create a display row with button, highlights, and iiif links
    row_display = widgets.HBox([
        widgets.HTML(value=f"<b>{idx + 1}</b>"),            # Display the index + 1
        widgets.HTML(
            value=f"<i>{row['document']}: </i>{unpacked}",
            layout=widgets.Layout(width='500px')
        ),                          # Highlights as text
        widgets.HTML(
            value=f"Score: {row['_score']:.3f}",
            layout=widgets.Layout(width='100px')
        ),  # Score as text
        widgets.HTML(
            value=f"Prev. ranks: {row['rank']}",
            layout=widgets.Layout(width='100px')
        ),  # Ranks as text
        widgets.HTML(
            value=f"Corpus: {row['corpus']}",
            layout=widgets.Layout(width='100px')
        ),  # Corpus as text
        button                                                 # Button to view more details
    ])
    
    # Wrap the row display in a Box with a border
    boxed_row_display = widgets.Box(
        [row_display],
        layout=widgets.Layout(
            border='1px solid black',  # Set the border style
            padding='10px',            # Add padding inside the box
            margin='5px 0px',          # Add margin above and below the box
            width='100%'               # Ensure the box spans the full width
        )
    )
    
    return boxed_row_display

# Create an interactive list of display rows (one for each DataFrame row)
rows = [create_row(row, idx) for idx, row in final_scores_df.head(R).iterrows()]

# Display area for detailed view
details_box = widgets.Output()
details_box.layout = widgets.Layout(
    border='1px solid black',
    padding='10px',
    width='100%',  # Ensure full width
    height='auto',  # Adjust height automatically
)

# Display the rows and details box
rows_box = widgets.VBox(rows)
display(rows_box, details_box)

VBox(children=(Box(children=(HBox(children=(HTML(value='<b>1</b>'), HTML(value='<i>Z166069305_430: </i><mark>b…

Output(layout=Layout(border='1px solid black', height='auto', padding='10px', width='100%'))

# Similarity Metrics with Sentence Transformer

In [84]:
from sentence_transformers import SentenceTransformer, util

In [85]:
# Load a pre-trained Sentence Transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# List of sentences
list_of_sentences = final_scores_df['unpacked_highlights'].tolist()

# Step 1: Encode the sentences and the query into embeddings
sentence_embeddings = model.encode(list_of_sentences, convert_to_tensor=True)
query_embedding = model.encode(query, convert_to_tensor=True)

# Step 2: Compute the cosine similarities between the query and the list of sentences
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)

# Step 3: Create a pandas DataFrame with sentences and their corresponding cosine similarity scores
df_cosine_sim = pd.DataFrame({
    'Sentence': list_of_sentences,
    'Cosine Similarity': cosine_scores.squeeze().tolist()
})



In [86]:
df_cosine_sim.head(R)

Unnamed: 0,Sentence,Cosine Similarity
0,S. sono. Gmelin hat diesen Vogel in der 13.,0.5871
1,Ich todtete eine gelbe Bachstelze ***) und ein...,0.293896
2,Auf diesem Damme stehen von Zeit zu Zeit klein...,0.347629
3,Indessen schienen mir diese Vögel sich mehr de...,0.471382
4,Die Wanderung dieser Vögel nach Ägypten hat je...,0.476594
5,"Ich traf auch eine Wachtel an, die ich sogleic...",0.200288
6,"Diese Vögel sind sehr fett, ihr Fleisch ist za...",0.407871
7,Reichhaltig war er. Kein Vogel ist in Ägypten ...,0.515706
8,Linn. Den ersten Morgens bei einem sehr schöne...,0.310032
9,Bearbeitung der Gegenden um den Canal Salza. F...,0.271253


In [87]:
# Add SentenceTransformer Cosine Similarity score to output dataframe
final_scores_df['ST_cosine_similarity'] = df_cosine_sim['Cosine Similarity']

In [88]:
# Add direct link to ÖNB Viewer to output dataframe
base_url = "https://digital.onb.ac.at/OnbViewer/viewer.faces?doc=ABO_%2B"

# Create the 'onb_viewer_link' column by appending each 'barcode' to the base URL
final_scores_df['onb_viewer_link'] = base_url + final_scores_df['barcode'].astype(str)

In [89]:
final_scores_df

Unnamed: 0,document,rrf_score,barcode,page,iiif_link,text_orig,text_clean,text_prep,_id,_highlights,_score,rank,corpus,rerank,unpacked_highlights,ST_cosine_similarity,onb_viewer_link
0,Z166069305_252,0.032258,Z166069305,252,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,224 7 Dieſer Falke war ein Weibchen. Der Ein...,Dieser Falke war ein Weibchen. Der Eingeweide ...,Dieser Falke war ein Weibchen. Der Eingeweidek...,8a5893c6-fde6-42f3-94e3-16db95629fe5,[{'text_prep': 'S. sono. Gmelin hat diesen Vog...,0.856740,2,D19,1,S. sono. Gmelin hat diesen Vogel in der 13.,0.587100,https://digital.onb.ac.at/OnbViewer/viewer.fac...
1,Z166069305_43,0.031025,Z166069305,43,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"16 5 1 entgegen, indem er mit großer Heftig...","entgegen, indem er mit groser Heftigkeit nach ...","entgegen, indem er mit großer Heftigkeit nach ...",4205ec0a-9cf3-448d-afba-3023e4c92052,[{'text_prep': 'Ich todtete eine gelbe Bachste...,0.852868,3/6,D19,2,Ich todtete eine gelbe Bachstelze ***) und ein...,0.293896,https://digital.onb.ac.at/OnbViewer/viewer.fac...
2,Z166069305_265,0.029911,Z166069305,265,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"237 Seeufer, an welchem wir bis zum See Maadi...","Seeufer, an welchem wir bis zum See Maadie hin...","Seeufer, an welchem wir bis zum See Maadie hin...",5fc895c1-b587-4442-a34c-dd446c3054f2,[{'text_prep': 'Auf diesem Damme stehen von Ze...,0.848943,4/10,D19,3,Auf diesem Damme stehen von Zeit zu Zeit klein...,0.347629,https://digital.onb.ac.at/OnbViewer/viewer.fac...
3,Z166069305_344,0.029412,Z166069305,344,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"316 niederſchoß, wünſchte ich mir Glük, dieſe...","niederschos, wunschte ich mir Gluk, diesen uns...","Niederschoss, wünschte ich mir Glück, diesen u...",a7debf83-ac70-445a-afee-5f35515b3e1d,[{'text_prep': 'Indessen schienen mir diese Vö...,0.843653,8,D19,4,Indessen schienen mir diese Vögel sich mehr de...,0.471382,https://digital.onb.ac.at/OnbViewer/viewer.fac...
4,Z166069305_243,0.027778,Z166069305,243,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"215 don -einſam liegenden Seen beleben, bleib...","don -einsam liegenden Seen beleben, bleiben di...","don-einsam liegenden Seen beleben, bleiben die...",58487609-e48c-47a1-8a0a-06569fef7f53,[{'text_prep': 'Die Wanderung dieser Vögel nac...,0.835293,24/3,D19,5,Die Wanderung dieser Vögel nach Ägypten hat je...,0.476594,https://digital.onb.ac.at/OnbViewer/viewer.fac...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,Z166069305_70,0.002119,Z166069305,70,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,42 ohne Thellung und ohne Abgaben die Früchte...,ohne Thellung und ohne Abgaben die Fruchte ihr...,ohne Thellung und ohne Abgaben die Früchte ihr...,06af6d12-1ddf-492d-8848-2537dc8b4a9c,[{'text_prep': 'Vorzüglich zog Frankreich groß...,0.794844,412,D19,414,Vorzüglich zog Frankreich große Vorteile darau...,0.237914,https://digital.onb.ac.at/OnbViewer/viewer.fac...
414,Z166069305_299,0.002114,Z166069305,299,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,271 i Auf meiner ganzen Reife habe ich und m...,Auf meiner ganzen Reife habe ich und meine Rei...,Auf meiner ganzen Reise habe ich und meine Gef...,01d37f8c-4a15-42a2-9ef5-a73fc7739766,[{'text_prep': 'Der Fluss sieht rotlich und en...,0.794709,413,D19,415,Der Fluss sieht rotlich und endlich grünlich a...,0.106832,https://digital.onb.ac.at/OnbViewer/viewer.fac...
415,Z166069305_410,0.002110,Z166069305,410,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"382 ilmy 1 "" mann, der feſt von dieſem Ged...","ilmy mann, der fest von diesem Gedanken uberze...","Ilmymann, der fest von diesem Gedanken überzeu...",dde560fc-9473-4faa-9e3c-1280ba131213,[{'text_prep': 'Sie hielten mich in ihrer Einb...,0.791636,414,D19,416,Sie hielten mich in ihrer Einbildung für einen...,0.140597,https://digital.onb.ac.at/OnbViewer/viewer.fac...
416,Z166069305_75,0.002105,Z166069305,75,https://iiif.onb.ac.at/images/ABO/Z166069305/0...,"47 3 dle ich hier gemacht habe, werden iðre ...","dle ich hier gemacht habe, werden ire Stelle i...","Die ich hier gemacht habe, werden ihre Stelle ...",32dd47f1-c7f4-4d4a-bb50-f428f79fce3b,"[{'text_prep': 'Die ich hier gemacht habe, wer...",0.791505,415,D19,417,"Die ich hier gemacht habe, werden ihre Stelle ...",0.136045,https://digital.onb.ac.at/OnbViewer/viewer.fac...


In [90]:
# Save DataFrame to CSV
final_scores_df.to_csv('data/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.csv', index=False)

# Save DataFrame to Excel
final_scores_df.to_excel('data/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.xlsx', index=False, engine='openpyxl')