{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-08 22:18:10,848 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n",
      "Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n"
     ]
    }
   ],
   "source": [
    "from categories.fluency import *\n",
    "from categories.accuracy import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sentence: The cat sat the quickly up apples banana.\n"
     ]
    }
   ],
   "source": [
    "src_sent = \"Das ist ein Test.\"  # Example source sentence\n",
    "trg_sent = input(f\"{src_sent}: \")  # Prompt the user to enter a sentence\n",
    "\n",
    "if trg_sent == \"\":\n",
    "    trg_sent = \"The cat sat the quickly up apples banana.\"\n",
    "\n",
    "print(\"Sentence:\", trg_sent)  # Print the input sentence\n",
    "\n",
    "err = grammar_errors(trg_sent)  # Call the function to execute the grammar error checking\n",
    "flu = pseudo_perplexity(trg_sent, threshold=3.1)  # Call the function to execute the fluency checking\n",
    "acc = accuracy(src_sent, trg_sent)  # Call the function to execute the accuracy checking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "An apostrophe may be missing.: apples banana.\n",
      "Adjusted liklihood 4.8056646935577145 over threshold 3.1: sat\n",
      "Adjusted liklihood 4.473408069089179 over threshold 3.1: the\n",
      "Adjusted liklihood 4.732453441503642 over threshold 3.1: quickly\n",
      "Adjusted liklihood 5.1115574262487735 over threshold 3.1: apples\n",
      "Word ist possibly mistranslated or omitted: cat\n",
      "Word ein possibly mistranslated or omitted: sat\n",
      "Word sat possibly mistranslated or added erroneously: sat\n",
      "Word the possibly mistranslated or added erroneously: the\n",
      "Word quickly possibly mistranslated or added erroneously: quickly\n",
      "Word up possibly mistranslated or added erroneously: up\n",
      "Word apples possibly mistranslated or added erroneously: apples\n",
      "Word banana possibly mistranslated or added erroneously: banana.\n"
     ]
    }
   ],
   "source": [
    "combined_err = err[\"errors\"] + flu[\"errors\"] + acc[\"errors\"]  # Combine the error counts from both functions\n",
    "\n",
    "for e in combined_err:\n",
    "    substr = \" \".join(trg_sent.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
    "    print(f\"{e['message']}: {substr}\")  # Print the error messages\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fluency Score: 76.62\n",
      "Accuracy Score: 24.45\n"
     ]
    }
   ],
   "source": [
    "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"]  # Calculate the fluency score\n",
    "print(\"Fluency Score:\", round(fluency_score, 2))  # Print the fluency score\n",
    "\n",
    "print(\"Accuracy Score:\", acc[\"score\"])  # Print the accuracy score"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "teach-bs",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}