Varine commited on
Commit
ad05dff
·
1 Parent(s): c73aa4a

Add running python file

Browse files
Files changed (1) hide show
  1. translation.ipynb +183 -0
translation.ipynb ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "id": "0b15c2b0-69f1-4201-a937-36bd89b97d3d",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Defaulting to user installation because normal site-packages is not writeable\n",
14
+ "Requirement already satisfied: transformers in /home/ubuntu/.local/lib/python3.10/site-packages (4.40.0)\n",
15
+ "Requirement already satisfied: datasets in /home/ubuntu/.local/lib/python3.10/site-packages (2.19.0)\n",
16
+ "Requirement already satisfied: sentencepiece in /home/ubuntu/.local/lib/python3.10/site-packages (0.2.0)\n",
17
+ "Requirement already satisfied: accelerate in /home/ubuntu/.local/lib/python3.10/site-packages (0.29.3)\n",
18
+ "Requirement already satisfied: sacrebleu in /home/ubuntu/.local/lib/python3.10/site-packages (2.4.2)\n",
19
+ "Requirement already satisfied: huggingface_hub in /home/ubuntu/.local/lib/python3.10/site-packages (0.22.2)\n",
20
+ "Requirement already satisfied: filelock in /usr/lib/python3/dist-packages (from transformers) (3.6.0)\n",
21
+ "Requirement already satisfied: numpy>=1.17 in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (1.25.2)\n",
22
+ "Requirement already satisfied: packaging>=20.0 in /usr/lib/python3/dist-packages (from transformers) (21.3)\n",
23
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from transformers) (5.4.1)\n",
24
+ "Requirement already satisfied: regex!=2019.12.17 in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (2024.4.16)\n",
25
+ "Requirement already satisfied: requests in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (2.31.0)\n",
26
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (0.19.1)\n",
27
+ "Requirement already satisfied: safetensors>=0.4.1 in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (0.4.3)\n",
28
+ "Requirement already satisfied: tqdm>=4.27 in /home/ubuntu/.local/lib/python3.10/site-packages (from transformers) (4.66.1)\n",
29
+ "Requirement already satisfied: pyarrow>=12.0.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (16.0.0)\n",
30
+ "Requirement already satisfied: pyarrow-hotfix in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (0.6)\n",
31
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (0.3.8)\n",
32
+ "Requirement already satisfied: pandas in /usr/lib/python3/dist-packages (from datasets) (1.3.5)\n",
33
+ "Requirement already satisfied: xxhash in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (3.4.1)\n",
34
+ "Requirement already satisfied: multiprocess in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (0.70.16)\n",
35
+ "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n",
36
+ "Requirement already satisfied: aiohttp in /home/ubuntu/.local/lib/python3.10/site-packages (from datasets) (3.9.5)\n",
37
+ "Requirement already satisfied: psutil in /usr/lib/python3/dist-packages (from accelerate) (5.9.0)\n",
38
+ "Requirement already satisfied: torch>=1.10.0 in /usr/lib/python3/dist-packages (from accelerate) (2.0.1)\n",
39
+ "Requirement already satisfied: portalocker in /home/ubuntu/.local/lib/python3.10/site-packages (from sacrebleu) (2.8.2)\n",
40
+ "Requirement already satisfied: tabulate>=0.8.9 in /home/ubuntu/.local/lib/python3.10/site-packages (from sacrebleu) (0.9.0)\n",
41
+ "Requirement already satisfied: colorama in /usr/lib/python3/dist-packages (from sacrebleu) (0.4.4)\n",
42
+ "Requirement already satisfied: lxml in /usr/lib/python3/dist-packages (from sacrebleu) (4.8.0)\n",
43
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/.local/lib/python3.10/site-packages (from huggingface_hub) (4.8.0)\n",
44
+ "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n",
45
+ "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (23.1.0)\n",
46
+ "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n",
47
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.5)\n",
48
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n",
49
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/ubuntu/.local/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n",
50
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ubuntu/.local/lib/python3.10/site-packages (from requests->transformers) (3.3.2)\n",
51
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->transformers) (3.3)\n",
52
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/lib/python3/dist-packages (from requests->transformers) (1.26.5)\n",
53
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->transformers) (2020.6.20)\n",
54
+ "\u001b[33mDEPRECATION: flatbuffers 1.12.1-git20200711.33e2d80-dfsg1-0.6 has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of flatbuffers or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
55
+ "\u001b[0m\n",
56
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
57
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
58
+ ]
59
+ }
60
+ ],
61
+ "source": [
62
+ "! pip install transformers datasets sentencepiece accelerate sacrebleu huggingface_hub"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 2,
68
+ "id": "5ce67d29-b717-48a1-9bb1-6436e2a700e2",
69
+ "metadata": {},
70
+ "outputs": [
71
+ {
72
+ "name": "stderr",
73
+ "output_type": "stream",
74
+ "text": [
75
+ "2024-04-23 05:47:46.579485: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
76
+ "2024-04-23 05:47:46.810151: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
77
+ "To enable the following instructions: AVX512F AVX512_VNNI, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
78
+ "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2\n",
79
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
80
+ ]
81
+ }
82
+ ],
83
+ "source": [
84
+ "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline\n",
85
+ "from datasets import load_dataset, load_metric\n",
86
+ "import numpy as np"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": 3,
92
+ "id": "249efb1d-4723-4544-98c1-4f75dea692a4",
93
+ "metadata": {},
94
+ "outputs": [
95
+ {
96
+ "name": "stderr",
97
+ "output_type": "stream",
98
+ "text": [
99
+ "/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses.\n",
100
+ " warnings.warn(\"Recommended: pip install sacremoses.\")\n"
101
+ ]
102
+ }
103
+ ],
104
+ "source": [
105
+ "model_path = 'opus-mt-zh-en-finetuned-zhen-checkpoints' # 模型保存路径\n",
106
+ "model = AutoModelForSeq2SeqLM.from_pretrained(model_path)\n",
107
+ "tokenizer = AutoTokenizer.from_pretrained(model_path)"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 4,
113
+ "id": "747b0aff-1eb3-42cc-84df-0f6c11cd9fcb",
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "def translate(text, model, tokenizer):\n",
118
+ " encoded_text = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True)\n",
119
+ " translated = model.generate(**encoded_text)\n",
120
+ " translated_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]\n",
121
+ " return translated_text"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 7,
127
+ "id": "ba6141cf-ab92-407c-b7de-2da6f8ad7573",
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "input_text = \"Scipy 是 Python 的一个核心科学计算库,它基于 NumPy,并为我们提供了许多用户友好的数学、科学以及工程方面的功能。Scipy 的功能涵盖了多个子模块,如优化、信号处理、图像处理、统计等。\""
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 8,
137
+ "id": "748b9ae1-145d-47aa-86c8-7ea924906921",
138
+ "metadata": {},
139
+ "outputs": [
140
+ {
141
+ "name": "stdout",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "Translated Text: ['Scippy, a core scientific computing base of Python, is based on NumPy and provides us with many user-friendly mathematics, science, engineering functions. It covers several sub-modules such as optimization, signal processing, image processing, statistics, etc.']\n"
145
+ ]
146
+ }
147
+ ],
148
+ "source": [
149
+ "output_text = translate(input_text, model, tokenizer)\n",
150
+ "print(\"Translated Text:\", output_text)"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": null,
156
+ "id": "120a56ac-f6d7-4bb6-893e-bfec6eadd0e8",
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": []
160
+ }
161
+ ],
162
+ "metadata": {
163
+ "kernelspec": {
164
+ "display_name": "Python 3 (ipykernel)",
165
+ "language": "python",
166
+ "name": "python3"
167
+ },
168
+ "language_info": {
169
+ "codemirror_mode": {
170
+ "name": "ipython",
171
+ "version": 3
172
+ },
173
+ "file_extension": ".py",
174
+ "mimetype": "text/x-python",
175
+ "name": "python",
176
+ "nbconvert_exporter": "python",
177
+ "pygments_lexer": "ipython3",
178
+ "version": "3.10.12"
179
+ }
180
+ },
181
+ "nbformat": 4,
182
+ "nbformat_minor": 5
183
+ }