diff --git "a/Q7_Training_Arabic_and_English.ipynb" "b/Q7_Training_Arabic_and_English.ipynb"
new file mode 100644--- /dev/null
+++ "b/Q7_Training_Arabic_and_English.ipynb"
@@ -0,0 +1,4283 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Arabic and English Translator**"
+ ],
+ "metadata": {
+ "id": "G1KysHBeEJCC"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "2eSvM9zX_2d3",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "72e76f54-afa1-4210-93aa-8e3998f1d53d"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting unsloth\n",
+ " Downloading unsloth-2024.12.4-py3-none-any.whl.metadata (59 kB)\n",
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/59.2 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.2/59.2 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting unsloth_zoo>=2024.11.8 (from unsloth)\n",
+ " Downloading unsloth_zoo-2024.12.1-py3-none-any.whl.metadata (16 kB)\n",
+ "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (2.5.1+cu121)\n",
+ "Collecting xformers>=0.0.27.post2 (from unsloth)\n",
+ " Downloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n",
+ "Collecting bitsandbytes (from unsloth)\n",
+ " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n",
+ "Collecting triton>=3.0.0 (from unsloth)\n",
+ " Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from unsloth) (24.2)\n",
+ "Collecting tyro (from unsloth)\n",
+ " Downloading tyro-0.9.2-py3-none-any.whl.metadata (9.4 kB)\n",
+ "Requirement already satisfied: transformers>=4.46.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (4.46.3)\n",
+ "Collecting datasets>=2.16.0 (from unsloth)\n",
+ " Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
+ "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.2.0)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from unsloth) (4.66.6)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from unsloth) (5.9.5)\n",
+ "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.45.1)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.26.4)\n",
+ "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.1.1)\n",
+ "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)\n",
+ " Downloading trl-0.12.2-py3-none-any.whl.metadata (11 kB)\n",
+ "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.13.2)\n",
+ "Collecting protobuf<4.0.0 (from unsloth)\n",
+ " Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (679 bytes)\n",
+ "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.26.3)\n",
+ "Collecting hf_transfer (from unsloth)\n",
+ " Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
+ "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (6.0.2)\n",
+ "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (0.4.5)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.16.1)\n",
+ "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (17.0.0)\n",
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth)\n",
+ " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.2.2)\n",
+ "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.32.3)\n",
+ "Collecting xxhash (from datasets>=2.16.0->unsloth)\n",
+ " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
+ "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth)\n",
+ " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
+ "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->unsloth)\n",
+ " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.11.9)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->unsloth) (4.12.2)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.4.2)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.1.4)\n",
+ "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (1.13.1)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth) (1.3.0)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->unsloth) (2024.9.11)\n",
+ "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->unsloth) (0.20.3)\n",
+ "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (13.9.4)\n",
+ "Collecting cut_cross_entropy (from unsloth_zoo>=2024.11.8->unsloth)\n",
+ " Downloading cut_cross_entropy-24.12.1-py3-none-any.whl.metadata (9.3 kB)\n",
+ "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from unsloth_zoo>=2024.11.8->unsloth) (11.0.0)\n",
+ "Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (0.16)\n",
+ "Collecting shtab>=1.5.6 (from tyro->unsloth)\n",
+ " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n",
+ "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (4.4.1)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (2.4.4)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.3.1)\n",
+ "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (4.0.3)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.5.0)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (6.1.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (0.2.1)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.18.3)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2024.8.30)\n",
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (3.0.0)\n",
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (2.18.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.4.0->unsloth) (3.0.2)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2024.2)\n",
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (0.1.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth) (1.16.0)\n",
+ "Downloading unsloth-2024.12.4-py3-none-any.whl (174 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.2/174.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.5/209.5 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading trl-0.12.2-py3-none-any.whl (365 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m365.7/365.7 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading unsloth_zoo-2024.12.1-py3-none-any.whl (60 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.2/60.2 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl (16.7 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.7/16.7 MB\u001b[0m \u001b[31m97.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m93.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading tyro-0.9.2-py3-none-any.whl (112 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.1/112.1 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n",
+ "Downloading cut_cross_entropy-24.12.1-py3-none-any.whl (22 kB)\n",
+ "Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: xxhash, triton, shtab, protobuf, hf_transfer, fsspec, dill, multiprocess, xformers, tyro, cut_cross_entropy, bitsandbytes, datasets, trl, unsloth_zoo, unsloth\n",
+ " Attempting uninstall: protobuf\n",
+ " Found existing installation: protobuf 4.25.5\n",
+ " Uninstalling protobuf-4.25.5:\n",
+ " Successfully uninstalled protobuf-4.25.5\n",
+ " Attempting uninstall: fsspec\n",
+ " Found existing installation: fsspec 2024.10.0\n",
+ " Uninstalling fsspec-2024.10.0:\n",
+ " Successfully uninstalled fsspec-2024.10.0\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n",
+ "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-24.12.1 datasets-3.1.0 dill-0.3.8 fsspec-2024.9.0 hf_transfer-0.1.8 multiprocess-0.70.16 protobuf-3.20.3 shtab-1.7.1 triton-3.1.0 trl-0.12.2 tyro-0.9.2 unsloth-2024.12.4 unsloth_zoo-2024.12.1 xformers-0.0.28.post3 xxhash-3.5.0\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "google"
+ ]
+ },
+ "id": "dbf9df8bf7454dbaab6ef975d2d37581"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Found existing installation: unsloth 2024.12.4\n",
+ "Uninstalling unsloth-2024.12.4:\n",
+ " Successfully uninstalled unsloth-2024.12.4\n",
+ "Collecting git+https://github.com/unslothai/unsloth.git\n",
+ " Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-364nvevq\n",
+ " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-364nvevq\n",
+ " Resolved https://github.com/unslothai/unsloth.git to commit df808d074d307d396b8c04640ef51375d68f2ef0\n",
+ " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ "Building wheels for collected packages: unsloth\n",
+ " Building wheel for unsloth (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for unsloth: filename=unsloth-2024.12.4-py3-none-any.whl size=173562 sha256=0140891ef1bba47874bd1947f9eeeda74b5c77c0dc7dfd40408bcc0ba08b44bf\n",
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-afmdqqd7/wheels/ed/d4/e9/76fb290ee3df0a5fc21ce5c2c788e29e9607a2353d8342fd0d\n",
+ "Successfully built unsloth\n",
+ "Installing collected packages: unsloth\n",
+ "Successfully installed unsloth-2024.12.4\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install unsloth\n",
+ "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "id": "85Jlo9PnEGjN"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "from unsloth import FastLanguageModel\n",
+ "import torch"
+ ],
+ "metadata": {
+ "id": "wqp8vZ4ImQC8",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "8f661e20-02a9-4d3c-aaae-f2e7816f92b8"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "🦥 Unsloth Zoo will now patch everything to make training faster!\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 365,
+ "referenced_widgets": [
+ "e551bb632a754f8ba78a36c9661a1917",
+ "247b0bfe148a4dd6a4f23bbf79cec6b4",
+ "789a88fa1ee045bcabf544ae95d9e157",
+ "f3cb57c536464a908dfc290a10fb82da",
+ "852a42665bd843888e9644b97e7467ad",
+ "3b6f79a0f0df4d2c9042e344c4117f86",
+ "1e1200e3727a40de8903d8b100966da8",
+ "724cb07137f34c9dad2c7e2aeddabcdd",
+ "01a1e84efe564c1da9b1e726d1d219c6",
+ "36ae929c9c9e441c9e159e87f437104c",
+ "794acbeec4284305869d0c871c69a1b7",
+ "3bb3e596332142e0940d8e40f3db2039",
+ "b02e725e78fd4481bf889e94b651d408",
+ "a21c3d66d4f24ee08d06c892a8c0af9c",
+ "f5edee46df184ca8956ef022900f81b5",
+ "5a1f64a7fb4c476ea685b97072621d42",
+ "581c0de230b6485ab4e543f72d017147",
+ "38b13c4e0101417796b8576a1964ff9f",
+ "37ae3ffc081749f58b12127aa416ed91",
+ "22c89fa1dd3543c4b549e58603d37234",
+ "90a819342d8249609af8c7bdd451215e",
+ "58fd93a4ee3d4464a7b95f51d194e7d3",
+ "2d894a3b562c43efa5ebaed2fbb12b35",
+ "08fc1651e54b4ab5aa167b1d4a24ccdd",
+ "118bd48fad064714a96e00bc7713832e",
+ "faad459e297f48a0b223ef794aa33c59",
+ "40daedddecd9455d95a26fb4fc1a581d",
+ "7a8f09a6812a47f1b56dbc529b3db1d4",
+ "396a0e92cb2f4e4196a5493fe265cdbb",
+ "99fe33f710c04899820137969e3a5ba1",
+ "5d19c1f508c041f48ae39f4c79be78dc",
+ "9c9bd4df5ff24420bece243bce39bbff",
+ "4335fc2b2c084f40b0c1c7fa219d93b7",
+ "57b718b8108e4b2296040e19143391b7",
+ "d2b383ee56d5494a839a7c67e01c92d2",
+ "0d655bdd9fb94ca4a72b6a77fecaa02f",
+ "af89710da44a4ee5815f65fa17dce485",
+ "49955114aa5d40b7b3ced7bfa0729bdb",
+ "213af196dfe8444480ec721616f085b9",
+ "5cbca3f7bab748af91f8b38c15d44015",
+ "b7a231ec2a594315b8e1794ba9bf5301",
+ "77bf2853cfba437786d7268db5837e98",
+ "4337c8d2f24a4c0da21e79d8e240a765",
+ "b78267be50754729b2c9ab6c0e5a639e",
+ "4e6f2e37b1bd42ad8963cda28b0aea4b",
+ "8204331de3cf4fedae7ce11098c96a29",
+ "5e54bf72110845f88ba002ed4b2c1285",
+ "4f453bf1f8a54c92bce5f9f2466aeb64",
+ "02e5bcdd62bb4193ac87aef526be02ee",
+ "37135d833cc04e2abdfecd0d34af8d84",
+ "77240b62246e40e994c74b40519166ca",
+ "1c90ed042b534610ab59972fee3bf017",
+ "c81e3554ba6548f18895c7bb6c8f5e2b",
+ "4ce21bc4d7cb4f18836311837cb7aad0",
+ "f1069cc6b9e648a69e4615fea5b16422",
+ "c5393d1070bb4b2bb726b6fa4d5634ee",
+ "d0993ccf77f94894894a63cb5d4b200a",
+ "9c4a6a8332b84114811b6a1e829416c1",
+ "b3aefe911588430a84f86c7fe9d89721",
+ "1ca38a0417904ea4a62a9b9b782c23d8",
+ "5fa491a7ede742fab73db5a3814168bb",
+ "1fb86c45c9c741ee9eff0d3858ac9062",
+ "11064f07959746a98966fc0abcccbb47",
+ "504f8876cb11424395b27a227e73bec3",
+ "84e02c549d194e298a9c922bfb0cfa52",
+ "e38ccf92718a421680ffab72f5c8a0df",
+ "3295d5afb2194603b05577a37700fcf8",
+ "1a64461fa04f4d2a9289123c83c2da9c",
+ "2007f0edfa004e4f966788f316e501a6",
+ "cfbf2ab837c84f81a5e40b941dfcd60a",
+ "66d3d7e894b24b9582d95cfd1b678856",
+ "1a5e858c2bb344828dab7ac83824efc3",
+ "2220d2ce35df40bbb4abec5de1480b1a",
+ "04260695b198425c810ad35db2b54d91",
+ "082b0e53f4ff4c168b6b0124644388e3",
+ "ace378acad5146969ac055f3bd01ea49",
+ "03a48fdcc2c8407a825fa71b5247e3f9"
+ ]
+ },
+ "id": "QmUBVEnvCDJv",
+ "outputId": "03185fd5-888a-4ec4-dcf4-6be433d47f5a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "==((====))== Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.\n",
+ " \\\\ /| GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform: Linux.\n",
+ "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0\n",
+ "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/2.26G [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "e551bb632a754f8ba78a36c9661a1917"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "generation_config.json: 0%| | 0.00/140 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "3bb3e596332142e0940d8e40f3db2039"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/3.37k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "2d894a3b562c43efa5ebaed2fbb12b35"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer.model: 0%| | 0.00/500k [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "57b718b8108e4b2296040e19143391b7"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "added_tokens.json: 0%| | 0.00/293 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "4e6f2e37b1bd42ad8963cda28b0aea4b"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/571 [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "c5393d1070bb4b2bb726b6fa4d5634ee"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/1.84M [00:00, ?B/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "3295d5afb2194603b05577a37700fcf8"
+ }
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "\n",
+ "max_seq_length = 2048\n",
+ "dtype = None\n",
+ "load_in_4bit = True\n",
+ "\n",
+ "\n",
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+ " model_name = \"unsloth/Phi-3.5-mini-instruct\",\n",
+ " max_seq_length = max_seq_length,\n",
+ " dtype = dtype,\n",
+ " load_in_4bit = load_in_4bit,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "wC5LwqRZhEAl"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6bZsfBuZDeCL",
+ "outputId": "d9f4c1ff-035a-4298-c35b-18c745e3185b"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n"
+ ]
+ }
+ ],
+ "source": [
+ "model = FastLanguageModel.get_peft_model(\n",
+ " model,\n",
+ " r=16,\n",
+ " target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
+ " lora_alpha=16,\n",
+ " lora_dropout=0,\n",
+ " bias=\"none\",\n",
+ " use_gradient_checkpointing=\"unsloth\",\n",
+ " random_state=3407,\n",
+ " use_rslora=False,\n",
+ " loftq_config=None\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Load the dataset using pandas\n",
+ "data_df = pd.read_json('/content/drive/MyDrive/Ara/Arabic.json', encoding='utf-8')\n",
+ "data_df.head()"
+ ],
+ "metadata": {
+ "id": "nLxakNjAk7OQ",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 293
+ },
+ "outputId": "76de4e93-06ab-4ff8-d7aa-d0f57636f38a"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " instruction \\\n",
+ "0 Could you render the given sentence into Arabic? \n",
+ "1 The given sentence is in English, you should t... \n",
+ "2 Your job is to translate the below sentence in... \n",
+ "3 I would like the given sentence to be translat... \n",
+ "4 The given sentence is in English, you should t... \n",
+ "\n",
+ " input \\\n",
+ "0 Give three tips for staying healthy. \n",
+ "1 1. Eat a balanced and nutritious diet: Make su... \n",
+ "2 What are the three primary colors? \n",
+ "3 The three primary colors are red, blue, and ye... \n",
+ "4 Describe the structure of an atom. \n",
+ "\n",
+ " output \n",
+ "0 أعط ثلاث نصائح للبقاء بصحة جيدة. \n",
+ "1 1. تناول نظامًا غذائيًا متوازنًا ومغذيًا: تأك... \n",
+ "2 ما هي الألوان الثلاثة الأساسية؟ \n",
+ "3 الألوان الثلاثة الأساسية هي الأحمر والأزرق وال... \n",
+ "4 صف بنية الذرة. "
+ ],
+ "text/html": [
+ "\n",
+ "
"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [30/30 01:41, Epoch 0/1]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " 1.338700 | \n",
+ " 1.055234 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 0.743600 | \n",
+ " 0.725511 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 0.504200 | \n",
+ " 0.647183 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 0.535600 | \n",
+ " 0.589806 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 0.492000 | \n",
+ " 0.555610 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 0.548500 | \n",
+ " 0.542290 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Fine-tuned model training completed.\n",
+ "Training metrics saved as 'training_curve_Phi_3_5_Arabic.csv'\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "trainer_stats = trainer.train()\n",
+ "\n",
+ "metrics = trainer.state.log_history\n",
+ "df = pd.DataFrame(metrics)\n",
+ "df.to_csv(\"training_curve_Phi_3_5_Arabic.csv\", index=False)\n",
+ "\n",
+ "print(\"Fine-tuned model training completed.\")\n",
+ "print(\"Training metrics saved as 'training_curve_Phi_3_5_Arabic.csv'\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trainer.save_model(\"Phi_3_5_Fine_tuned_model_Arabic\")\n",
+ "tokenizer.save_pretrained(\"Phi_3_5_Fine_tuned_model_Arabic\")\n",
+ "\n",
+ "print(\"Fine-tuned model saved as 'Phi_3_5_Fine_tuned_model_Arabic'\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Y0B5t2AVDeVh",
+ "outputId": "8f9d1b12-0972-48e9-f577-e0e08a613f0c"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Fine-tuned model saved as 'Phi_3_5_Fine_tuned_model_Arabic'\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "if \"loss\" in df.columns:\n",
+ " plt.plot(df[\"loss\"], label=\"Training Loss\", marker='o')\n",
+ "\n",
+ "if \"eval_loss\" in df.columns:\n",
+ " plt.plot(df[\"eval_loss\"], label=\"Validation Loss\", marker='x')\n",
+ "\n",
+ "plt.title(\"Training and Validation Loss Curve\")\n",
+ "plt.xlabel(\"Steps\")\n",
+ "plt.ylabel(\"Loss\")\n",
+ "plt.legend()\n",
+ "plt.grid()\n",
+ "\n",
+ "plt.savefig(\"loss_curve_Phi_3_5_Arabic.png\")\n",
+ "plt.show()\n",
+ "\n",
+ "print(\"Model and tokenizer saved.\")\n",
+ "print(\"Training losses stored in 'training_losses_Phi3_5_Fine_tuned_model.csv'\")\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 507
+ },
+ "id": "ZMkcCqKRMb15",
+ "outputId": "78d5fcef-ef32-40b3-bbfb-6fe62f6a0e5f"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "