{ "cells": [ { "cell_type": "markdown", "source": [ "# **Arabic and English Translator**" ], "metadata": { "id": "G1KysHBeEJCC" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "2eSvM9zX_2d3", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "72e76f54-afa1-4210-93aa-8e3998f1d53d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting unsloth\n", " Downloading unsloth-2024.12.4-py3-none-any.whl.metadata (59 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/59.2 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.2/59.2 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting unsloth_zoo>=2024.11.8 (from unsloth)\n", " Downloading unsloth_zoo-2024.12.1-py3-none-any.whl.metadata (16 kB)\n", "Requirement already satisfied: torch>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (2.5.1+cu121)\n", "Collecting xformers>=0.0.27.post2 (from unsloth)\n", " Downloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)\n", "Collecting bitsandbytes (from unsloth)\n", " Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)\n", "Collecting triton>=3.0.0 (from unsloth)\n", " Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from unsloth) (24.2)\n", "Collecting tyro (from unsloth)\n", " Downloading tyro-0.9.2-py3-none-any.whl.metadata (9.4 kB)\n", "Requirement already satisfied: transformers>=4.46.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (4.46.3)\n", "Collecting datasets>=2.16.0 (from unsloth)\n", " Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n", "Requirement already satisfied: sentencepiece>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.2.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from unsloth) (4.66.6)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from unsloth) (5.9.5)\n", "Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.45.1)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.26.4)\n", "Requirement already satisfied: accelerate>=0.34.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (1.1.1)\n", "Collecting trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)\n", " Downloading trl-0.12.2-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: peft!=0.11.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.13.2)\n", "Collecting protobuf<4.0.0 (from unsloth)\n", " Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (679 bytes)\n", "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (from unsloth) (0.26.3)\n", "Collecting hf_transfer (from unsloth)\n", " Downloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.7 kB)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (6.0.2)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.1->unsloth) (0.4.5)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.16.1)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (17.0.0)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->unsloth)\n", " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.2.2)\n", "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (2.32.3)\n", "Collecting xxhash (from datasets>=2.16.0->unsloth)\n", " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->unsloth)\n", " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->unsloth)\n", " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth) (3.11.9)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->unsloth) (4.12.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (3.1.4)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=2.4.0->unsloth) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=2.4.0->unsloth) (1.3.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->unsloth) (2024.9.11)\n", "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->unsloth) (0.20.3)\n", "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (13.9.4)\n", "Collecting cut_cross_entropy (from unsloth_zoo>=2024.11.8->unsloth)\n", " Downloading cut_cross_entropy-24.12.1-py3-none-any.whl.metadata (9.3 kB)\n", "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from unsloth_zoo>=2024.11.8->unsloth) (11.0.0)\n", "Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (0.16)\n", "Collecting shtab>=1.5.6 (from tyro->unsloth)\n", " Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n", "Requirement already satisfied: typeguard>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth) (4.4.1)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (2.4.4)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.3.1)\n", "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (4.0.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (24.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (6.1.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (0.2.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth) (1.18.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.4.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2.2.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth) (2024.8.30)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (2.18.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2.4.0->unsloth) (3.0.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth) (2024.2)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->trl!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9->unsloth) (0.1.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth) (1.16.0)\n", "Downloading unsloth-2024.12.4-py3-none-any.whl (174 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.2/174.2 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m47.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.5/209.5 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading trl-0.12.2-py3-none-any.whl (365 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m365.7/365.7 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading unsloth_zoo-2024.12.1-py3-none-any.whl (60 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.2/60.2 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl (16.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.7/16.7 MB\u001b[0m \u001b[31m97.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.1/69.1 MB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading hf_transfer-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m93.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tyro-0.9.2-py3-none-any.whl (112 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.1/112.1 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", "Downloading cut_cross_entropy-24.12.1-py3-none-any.whl (22 kB)\n", "Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: xxhash, triton, shtab, protobuf, hf_transfer, fsspec, dill, multiprocess, xformers, tyro, cut_cross_entropy, bitsandbytes, datasets, trl, unsloth_zoo, unsloth\n", " Attempting uninstall: protobuf\n", " Found existing installation: protobuf 4.25.5\n", " Uninstalling protobuf-4.25.5:\n", " Successfully uninstalled protobuf-4.25.5\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2024.10.0\n", " Uninstalling fsspec-2024.10.0:\n", " Successfully uninstalled fsspec-2024.10.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n", "grpcio-status 1.62.3 requires protobuf>=4.21.6, but you have protobuf 3.20.3 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed bitsandbytes-0.45.0 cut_cross_entropy-24.12.1 datasets-3.1.0 dill-0.3.8 fsspec-2024.9.0 hf_transfer-0.1.8 multiprocess-0.70.16 protobuf-3.20.3 shtab-1.7.1 triton-3.1.0 trl-0.12.2 tyro-0.9.2 unsloth-2024.12.4 unsloth_zoo-2024.12.1 xformers-0.0.28.post3 xxhash-3.5.0\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "google" ] }, "id": "dbf9df8bf7454dbaab6ef975d2d37581" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Found existing installation: unsloth 2024.12.4\n", "Uninstalling unsloth-2024.12.4:\n", " Successfully uninstalled unsloth-2024.12.4\n", "Collecting git+https://github.com/unslothai/unsloth.git\n", " Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-364nvevq\n", " Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-364nvevq\n", " Resolved https://github.com/unslothai/unsloth.git to commit df808d074d307d396b8c04640ef51375d68f2ef0\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Building wheels for collected packages: unsloth\n", " Building wheel for unsloth (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for unsloth: filename=unsloth-2024.12.4-py3-none-any.whl size=173562 sha256=0140891ef1bba47874bd1947f9eeeda74b5c77c0dc7dfd40408bcc0ba08b44bf\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-afmdqqd7/wheels/ed/d4/e9/76fb290ee3df0a5fc21ce5c2c788e29e9607a2353d8342fd0d\n", "Successfully built unsloth\n", "Installing collected packages: unsloth\n", "Successfully installed unsloth-2024.12.4\n" ] } ], "source": [ "!pip install unsloth\n", "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git" ] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "85Jlo9PnEGjN" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "from unsloth import FastLanguageModel\n", "import torch" ], "metadata": { "id": "wqp8vZ4ImQC8", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "8f661e20-02a9-4d3c-aaae-f2e7816f92b8" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "🦥 Unsloth Zoo will now patch everything to make training faster!\n" ] } ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 365, "referenced_widgets": [ "e551bb632a754f8ba78a36c9661a1917", "247b0bfe148a4dd6a4f23bbf79cec6b4", "789a88fa1ee045bcabf544ae95d9e157", "f3cb57c536464a908dfc290a10fb82da", "852a42665bd843888e9644b97e7467ad", "3b6f79a0f0df4d2c9042e344c4117f86", "1e1200e3727a40de8903d8b100966da8", "724cb07137f34c9dad2c7e2aeddabcdd", "01a1e84efe564c1da9b1e726d1d219c6", "36ae929c9c9e441c9e159e87f437104c", "794acbeec4284305869d0c871c69a1b7", "3bb3e596332142e0940d8e40f3db2039", "b02e725e78fd4481bf889e94b651d408", "a21c3d66d4f24ee08d06c892a8c0af9c", "f5edee46df184ca8956ef022900f81b5", "5a1f64a7fb4c476ea685b97072621d42", "581c0de230b6485ab4e543f72d017147", "38b13c4e0101417796b8576a1964ff9f", "37ae3ffc081749f58b12127aa416ed91", "22c89fa1dd3543c4b549e58603d37234", "90a819342d8249609af8c7bdd451215e", "58fd93a4ee3d4464a7b95f51d194e7d3", "2d894a3b562c43efa5ebaed2fbb12b35", "08fc1651e54b4ab5aa167b1d4a24ccdd", "118bd48fad064714a96e00bc7713832e", "faad459e297f48a0b223ef794aa33c59", "40daedddecd9455d95a26fb4fc1a581d", "7a8f09a6812a47f1b56dbc529b3db1d4", "396a0e92cb2f4e4196a5493fe265cdbb", "99fe33f710c04899820137969e3a5ba1", "5d19c1f508c041f48ae39f4c79be78dc", "9c9bd4df5ff24420bece243bce39bbff", "4335fc2b2c084f40b0c1c7fa219d93b7", "57b718b8108e4b2296040e19143391b7", "d2b383ee56d5494a839a7c67e01c92d2", "0d655bdd9fb94ca4a72b6a77fecaa02f", "af89710da44a4ee5815f65fa17dce485", "49955114aa5d40b7b3ced7bfa0729bdb", "213af196dfe8444480ec721616f085b9", "5cbca3f7bab748af91f8b38c15d44015", "b7a231ec2a594315b8e1794ba9bf5301", "77bf2853cfba437786d7268db5837e98", "4337c8d2f24a4c0da21e79d8e240a765", "b78267be50754729b2c9ab6c0e5a639e", "4e6f2e37b1bd42ad8963cda28b0aea4b", "8204331de3cf4fedae7ce11098c96a29", "5e54bf72110845f88ba002ed4b2c1285", "4f453bf1f8a54c92bce5f9f2466aeb64", "02e5bcdd62bb4193ac87aef526be02ee", "37135d833cc04e2abdfecd0d34af8d84", "77240b62246e40e994c74b40519166ca", "1c90ed042b534610ab59972fee3bf017", "c81e3554ba6548f18895c7bb6c8f5e2b", "4ce21bc4d7cb4f18836311837cb7aad0", "f1069cc6b9e648a69e4615fea5b16422", "c5393d1070bb4b2bb726b6fa4d5634ee", "d0993ccf77f94894894a63cb5d4b200a", "9c4a6a8332b84114811b6a1e829416c1", "b3aefe911588430a84f86c7fe9d89721", "1ca38a0417904ea4a62a9b9b782c23d8", "5fa491a7ede742fab73db5a3814168bb", "1fb86c45c9c741ee9eff0d3858ac9062", "11064f07959746a98966fc0abcccbb47", "504f8876cb11424395b27a227e73bec3", "84e02c549d194e298a9c922bfb0cfa52", "e38ccf92718a421680ffab72f5c8a0df", "3295d5afb2194603b05577a37700fcf8", "1a64461fa04f4d2a9289123c83c2da9c", "2007f0edfa004e4f966788f316e501a6", "cfbf2ab837c84f81a5e40b941dfcd60a", "66d3d7e894b24b9582d95cfd1b678856", "1a5e858c2bb344828dab7ac83824efc3", "2220d2ce35df40bbb4abec5de1480b1a", "04260695b198425c810ad35db2b54d91", "082b0e53f4ff4c168b6b0124644388e3", "ace378acad5146969ac055f3bd01ea49", "03a48fdcc2c8407a825fa71b5247e3f9" ] }, "id": "QmUBVEnvCDJv", "outputId": "03185fd5-888a-4ec4-dcf4-6be433d47f5a" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "==((====))== Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.\n", " \\\\ /| GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform: Linux.\n", "O^O/ \\_/ \\ Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0\n", "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "model.safetensors: 0%| | 0.00/2.26G [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "e551bb632a754f8ba78a36c9661a1917" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "generation_config.json: 0%| | 0.00/140 [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "3bb3e596332142e0940d8e40f3db2039" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "tokenizer_config.json: 0%| | 0.00/3.37k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "2d894a3b562c43efa5ebaed2fbb12b35" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "tokenizer.model: 0%| | 0.00/500k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "57b718b8108e4b2296040e19143391b7" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "added_tokens.json: 0%| | 0.00/293 [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "4e6f2e37b1bd42ad8963cda28b0aea4b" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "special_tokens_map.json: 0%| | 0.00/571 [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "c5393d1070bb4b2bb726b6fa4d5634ee" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "tokenizer.json: 0%| | 0.00/1.84M [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "3295d5afb2194603b05577a37700fcf8" } }, "metadata": {} } ], "source": [ "\n", "max_seq_length = 2048\n", "dtype = None\n", "load_in_4bit = True\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/Phi-3.5-mini-instruct\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", ")" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "wC5LwqRZhEAl" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6bZsfBuZDeCL", "outputId": "d9f4c1ff-035a-4298-c35b-18c745e3185b" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r=16,\n", " target_modules=[\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n", " lora_alpha=16,\n", " lora_dropout=0,\n", " bias=\"none\",\n", " use_gradient_checkpointing=\"unsloth\",\n", " random_state=3407,\n", " use_rslora=False,\n", " loftq_config=None\n", ")\n" ] }, { "cell_type": "code", "source": [ "# Load the dataset using pandas\n", "data_df = pd.read_json('/content/drive/MyDrive/Ara/Arabic.json', encoding='utf-8')\n", "data_df.head()" ], "metadata": { "id": "nLxakNjAk7OQ", "colab": { "base_uri": "https://localhost:8080/", "height": 293 }, "outputId": "76de4e93-06ab-4ff8-d7aa-d0f57636f38a" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " instruction \\\n", "0 Could you render the given sentence into Arabic? \n", "1 The given sentence is in English, you should t... \n", "2 Your job is to translate the below sentence in... \n", "3 I would like the given sentence to be translat... \n", "4 The given sentence is in English, you should t... \n", "\n", " input \\\n", "0 Give three tips for staying healthy. \n", "1 1. Eat a balanced and nutritious diet: Make su... \n", "2 What are the three primary colors? \n", "3 The three primary colors are red, blue, and ye... \n", "4 Describe the structure of an atom. \n", "\n", " output \n", "0 أعط ثلاث نصائح للبقاء بصحة جيدة. \n", "1 1. تناول نظامًا غذائيًا متوازنًا ومغذيًا: تأك... \n", "2 ما هي الألوان الثلاثة الأساسية؟ \n", "3 الألوان الثلاثة الأساسية هي الأحمر والأزرق وال... \n", "4 صف بنية الذرة. " ], "text/html": [ "\n", "
\n", " | instruction | \n", "input | \n", "output | \n", "
---|---|---|---|
0 | \n", "Could you render the given sentence into Arabic? | \n", "Give three tips for staying healthy. | \n", "أعط ثلاث نصائح للبقاء بصحة جيدة. | \n", "
1 | \n", "The given sentence is in English, you should t... | \n", "1. Eat a balanced and nutritious diet: Make su... | \n", "1. تناول نظامًا غذائيًا متوازنًا ومغذيًا: تأك... | \n", "
2 | \n", "Your job is to translate the below sentence in... | \n", "What are the three primary colors? | \n", "ما هي الألوان الثلاثة الأساسية؟ | \n", "
3 | \n", "I would like the given sentence to be translat... | \n", "The three primary colors are red, blue, and ye... | \n", "الألوان الثلاثة الأساسية هي الأحمر والأزرق وال... | \n", "
4 | \n", "The given sentence is in English, you should t... | \n", "Describe the structure of an atom. | \n", "صف بنية الذرة. | \n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
5 | \n", "1.338700 | \n", "1.055234 | \n", "
10 | \n", "0.743600 | \n", "0.725511 | \n", "
15 | \n", "0.504200 | \n", "0.647183 | \n", "
20 | \n", "0.535600 | \n", "0.589806 | \n", "
25 | \n", "0.492000 | \n", "0.555610 | \n", "
30 | \n", "0.548500 | \n", "0.542290 | \n", "
" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Fine-tuned model training completed.\n", "Training metrics saved as 'training_curve_Phi_3_5_Arabic.csv'\n" ] } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "trainer_stats = trainer.train()\n", "\n", "metrics = trainer.state.log_history\n", "df = pd.DataFrame(metrics)\n", "df.to_csv(\"training_curve_Phi_3_5_Arabic.csv\", index=False)\n", "\n", "print(\"Fine-tuned model training completed.\")\n", "print(\"Training metrics saved as 'training_curve_Phi_3_5_Arabic.csv'\")\n" ] }, { "cell_type": "code", "source": [ "trainer.save_model(\"Phi_3_5_Fine_tuned_model_Arabic\")\n", "tokenizer.save_pretrained(\"Phi_3_5_Fine_tuned_model_Arabic\")\n", "\n", "print(\"Fine-tuned model saved as 'Phi_3_5_Fine_tuned_model_Arabic'\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Y0B5t2AVDeVh", "outputId": "8f9d1b12-0972-48e9-f577-e0e08a613f0c" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Fine-tuned model saved as 'Phi_3_5_Fine_tuned_model_Arabic'\n" ] } ] }, { "cell_type": "code", "source": [ "if \"loss\" in df.columns:\n", " plt.plot(df[\"loss\"], label=\"Training Loss\", marker='o')\n", "\n", "if \"eval_loss\" in df.columns:\n", " plt.plot(df[\"eval_loss\"], label=\"Validation Loss\", marker='x')\n", "\n", "plt.title(\"Training and Validation Loss Curve\")\n", "plt.xlabel(\"Steps\")\n", "plt.ylabel(\"Loss\")\n", "plt.legend()\n", "plt.grid()\n", "\n", "plt.savefig(\"loss_curve_Phi_3_5_Arabic.png\")\n", "plt.show()\n", "\n", "print(\"Model and tokenizer saved.\")\n", "print(\"Training losses stored in 'training_losses_Phi3_5_Fine_tuned_model.csv'\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 507 }, "id": "ZMkcCqKRMb15", "outputId": "78d5fcef-ef32-40b3-bbfb-6fe62f6a0e5f" }, "execution_count": 29, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "