{ "cells": [ { "cell_type": "code", "execution_count": 5, "id": "e3fbdac5-cd38-4e41-b5d2-d9d112b4ac1b", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: http://mirrors.aliyun.com/pypi/simple\n", "Requirement already satisfied: transformers in /root/miniconda3/lib/python3.12/site-packages (4.47.1)\n", "Requirement already satisfied: sentencepiece in /root/miniconda3/lib/python3.12/site-packages (0.2.0)\n", "Requirement already satisfied: google in /root/miniconda3/lib/python3.12/site-packages (3.0.0)\n", "Requirement already satisfied: protobuf in /root/miniconda3/lib/python3.12/site-packages (5.27.0)\n", "Requirement already satisfied: deepspeed in /root/miniconda3/lib/python3.12/site-packages (0.16.2)\n", "Requirement already satisfied: peft in /root/miniconda3/lib/python3.12/site-packages (0.14.0)\n", "Collecting datasets\n", " Downloading http://mirrors.aliyun.com/pypi/packages/d7/84/0df6c5981f5fc722381662ff8cfbdf8aad64bec875f75d80b55bfef394ce/datasets-3.2.0-py3-none-any.whl (480 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /root/miniconda3/lib/python3.12/site-packages (from transformers) (3.14.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.24.0 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (0.27.0)\n", "Requirement already satisfied: numpy>=1.17 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /root/miniconda3/lib/python3.12/site-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.1 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (0.4.5)\n", "Requirement already satisfied: tqdm>=4.27 in /root/miniconda3/lib/python3.12/site-packages (from transformers) (4.66.2)\n", "Requirement already satisfied: beautifulsoup4 in /root/miniconda3/lib/python3.12/site-packages (from google) (4.12.3)\n", "Requirement already satisfied: einops in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (0.8.0)\n", "Requirement already satisfied: hjson in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (3.1.0)\n", "Requirement already satisfied: msgpack in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (1.1.0)\n", "Requirement already satisfied: ninja in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (1.11.1.3)\n", "Requirement already satisfied: psutil in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (5.9.8)\n", "Requirement already satisfied: py-cpuinfo in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (9.0.0)\n", "Requirement already satisfied: pydantic>=2.0.0 in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (2.10.4)\n", "Requirement already satisfied: torch in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (2.3.0+cu121)\n", "Requirement already satisfied: nvidia-ml-py in /root/miniconda3/lib/python3.12/site-packages (from deepspeed) (12.560.30)\n", "Requirement already satisfied: accelerate>=0.21.0 in /root/miniconda3/lib/python3.12/site-packages (from peft) (1.2.1)\n", "Collecting pyarrow>=15.0.0 (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl (40.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.1/40.1 MB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting pandas (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.7/12.7 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting requests (from transformers)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl (64 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m31.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting tqdm>=4.27 (from transformers)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl (78 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.5/78.5 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting xxhash (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.4/194.4 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multiprocess<0.70.17 (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl (146 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m146.7/146.7 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /root/miniconda3/lib/python3.12/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.5.0)\n", "Collecting aiohttp (from datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/40/7f/6de218084f9b653026bd7063cd8045123a7ba90c25176465f266976d8c82/aiohttp-3.11.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hCollecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/b9/74/fbb6559de3607b3300b9be3cc64e97548d55678e44623db17820dbd20002/aiohappyeyeballs-2.4.4-py3-none-any.whl (14 kB)\n", "Collecting aiosignal>=1.1.2 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB)\n", "Requirement already satisfied: attrs>=17.3.0 in /root/miniconda3/lib/python3.12/site-packages (from aiohttp->datasets) (23.2.0)\n", "Collecting frozenlist>=1.1.1 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/af/f2/64b73a9bb86f5a89fb55450e97cd5c1f84a862d4ff90d9fd1a73ab0f64a5/frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (283 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.6/283.6 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multidict<7.0,>=4.5 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/d3/c8/529101d7176fe7dfe1d99604e48d69c5dfdcadb4f06561f465c8ef12b4df/multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (131 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m131.0/131.0 kB\u001b[0m \u001b[31m56.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting propcache>=0.2.0 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/1c/07/ebe102777a830bca91bbb93e3479cd34c2ca5d0361b83be9dbd93104865e/propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (243 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.6/243.6 kB\u001b[0m \u001b[31m41.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting yarl<2.0,>=1.17.0 (from aiohttp->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/1a/e1/a097d5755d3ea8479a42856f51d97eeff7a3a7160593332d98f2709b3580/yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (336 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m336.9/336.9 kB\u001b[0m \u001b[31m41.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /root/miniconda3/lib/python3.12/site-packages (from huggingface-hub<1.0,>=0.24.0->transformers) (4.12.2)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /root/miniconda3/lib/python3.12/site-packages (from pydantic>=2.0.0->deepspeed) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.27.2 in /root/miniconda3/lib/python3.12/site-packages (from pydantic>=2.0.0->deepspeed) (2.27.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /root/miniconda3/lib/python3.12/site-packages (from requests->transformers) (2.0.4)\n", "Requirement already satisfied: idna<4,>=2.5 in /root/miniconda3/lib/python3.12/site-packages (from requests->transformers) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /root/miniconda3/lib/python3.12/site-packages (from requests->transformers) (2.1.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /root/miniconda3/lib/python3.12/site-packages (from requests->transformers) (2024.2.2)\n", "Requirement already satisfied: sympy in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (1.12.1)\n", "Requirement already satisfied: networkx in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (3.3)\n", "Requirement already satisfied: jinja2 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (3.1.4)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (8.9.2.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.0.106)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (2.20.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /root/miniconda3/lib/python3.12/site-packages (from torch->deepspeed) (12.1.105)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /root/miniconda3/lib/python3.12/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->deepspeed) (12.5.40)\n", "Requirement already satisfied: soupsieve>1.2 in /root/miniconda3/lib/python3.12/site-packages (from beautifulsoup4->google) (2.5)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /root/miniconda3/lib/python3.12/site-packages (from pandas->datasets) (2.9.0.post0)\n", "Collecting pytz>=2020.1 (from pandas->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.0/508.0 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting tzdata>=2022.7 (from pandas->datasets)\n", " Downloading http://mirrors.aliyun.com/pypi/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl (346 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m346.6/346.6 kB\u001b[0m \u001b[31m36.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: six>=1.5 in /root/miniconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /root/miniconda3/lib/python3.12/site-packages (from jinja2->torch->deepspeed) (2.1.5)\n", "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /root/miniconda3/lib/python3.12/site-packages (from sympy->torch->deepspeed) (1.3.0)\n", "Installing collected packages: pytz, xxhash, tzdata, tqdm, requests, pyarrow, propcache, multidict, frozenlist, dill, aiohappyeyeballs, yarl, pandas, multiprocess, aiosignal, aiohttp, datasets\n", " Attempting uninstall: tqdm\n", " Found existing installation: tqdm 4.66.2\n", " Uninstalling tqdm-4.66.2:\n", " Successfully uninstalled tqdm-4.66.2\n", " Attempting uninstall: requests\n", " Found existing installation: requests 2.31.0\n", " Uninstalling requests-2.31.0:\n", " Successfully uninstalled requests-2.31.0\n", "Successfully installed aiohappyeyeballs-2.4.4 aiohttp-3.11.11 aiosignal-1.3.2 datasets-3.2.0 dill-0.3.8 frozenlist-1.5.0 multidict-6.1.0 multiprocess-0.70.16 pandas-2.2.3 propcache-0.2.1 pyarrow-18.1.0 pytz-2024.2 requests-2.32.3 tqdm-4.67.1 tzdata-2024.2 xxhash-3.5.0 yarl-1.18.3\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install transformers sentencepiece google protobuf deepspeed peft datasets " ] }, { "cell_type": "code", "execution_count": 9, "id": "4e906370-40c7-4f6b-a700-f183a9308c78", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://hf-mirror.com\n" ] } ], "source": [ "import os\n", "\n", "# 设置环境变量, autodl专区 其他idc\n", "os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'\n", "\n", "# 打印环境变量以确认设置成功\n", "print(os.environ.get('HF_ENDPOINT'))" ] }, { "cell_type": "code", "execution_count": 1, "id": "ecc98529-6581-41d2-a876-23ce5187cae7", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "# 设置环境变量, autodl一般区域\n", "result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n", "output = result.stdout\n", "for line in output.splitlines():\n", " if '=' in line:\n", " var, value = line.split('=', 1)\n", " os.environ[var] = value" ] }, { "cell_type": "code", "execution_count": 2, "id": "b01fc372-33af-46e5-8c0e-8bccba7237ee", "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "# load ~11k samples from promoters prediction dataset\n", "dataset = load_dataset(\"dnagpt/dna_core_promoter\")['train'].train_test_split(test_size=0.1)" ] }, { "cell_type": "code", "execution_count": 3, "id": "136c38d4-bd0f-4ecd-9165-2fd5b5207c1d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['sequence', 'label'],\n", " num_rows: 53276\n", " })\n", " test: Dataset({\n", " features: ['sequence', 'label'],\n", " num_rows: 5920\n", " })\n", "})" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset" ] }, { "cell_type": "markdown", "id": "28acb64e-8d1e-4482-a515-344a2e0344c4", "metadata": {}, "source": [ "## lfs 支持\n", "apt-get update\n", "\n", "apt-get install git-lfs\n", "\n", "git lfs install" ] }, { "cell_type": "code", "execution_count": null, "id": "3d3cefb0-1eed-4f23-8591-1990f7113820", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }