{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-04-08 22:18:10,848 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n", "Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n" ] } ], "source": [ "from categories.fluency import *\n", "from categories.accuracy import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sentence: The cat sat the quickly up apples banana.\n" ] } ], "source": [ "src_sent = \"Das ist ein Test.\" # Example source sentence\n", "trg_sent = input(f\"{src_sent}: \") # Prompt the user to enter a sentence\n", "\n", "if trg_sent == \"\":\n", " trg_sent = \"The cat sat the quickly up apples banana.\"\n", "\n", "print(\"Sentence:\", trg_sent) # Print the input sentence\n", "\n", "err = grammar_errors(trg_sent) # Call the function to execute the grammar error checking\n", "flu = pseudo_perplexity(trg_sent, threshold=3.1) # Call the function to execute the fluency checking\n", "acc = accuracy(src_sent, trg_sent) # Call the function to execute the accuracy checking" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "An apostrophe may be missing.: apples banana.\n", "Adjusted liklihood 4.8056646935577145 over threshold 3.1: sat\n", "Adjusted liklihood 4.473408069089179 over threshold 3.1: the\n", "Adjusted liklihood 4.732453441503642 over threshold 3.1: quickly\n", "Adjusted liklihood 5.1115574262487735 over threshold 3.1: apples\n", "Word ist possibly mistranslated or omitted: cat\n", "Word ein possibly mistranslated or omitted: sat\n", "Word sat possibly mistranslated or added erroneously: sat\n", "Word the possibly mistranslated or added erroneously: the\n", "Word quickly possibly mistranslated or added erroneously: quickly\n", "Word up possibly mistranslated or added erroneously: up\n", "Word apples possibly mistranslated or added erroneously: apples\n", "Word banana possibly mistranslated or added erroneously: banana.\n" ] } ], "source": [ "combined_err = err[\"errors\"] + flu[\"errors\"] + acc[\"errors\"] # Combine the error counts from both functions\n", "\n", "for e in combined_err:\n", " substr = \" \".join(trg_sent.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n", " print(f\"{e['message']}: {substr}\") # Print the error messages\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fluency Score: 76.62\n", "Accuracy Score: 24.45\n" ] } ], "source": [ "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"] # Calculate the fluency score\n", "print(\"Fluency Score:\", round(fluency_score, 2)) # Print the fluency score\n", "\n", "print(\"Accuracy Score:\", acc[\"score\"]) # Print the accuracy score" ] } ], "metadata": { "kernelspec": { "display_name": "teach-bs", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 2 }