GlowCheese commited on
Commit
2268589
·
1 Parent(s): 9756d99
.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+
164
+ zemo*.py
README.md CHANGED
@@ -1,3 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # CS 224N Default Final Project - Multitask BERT
2
 
3
  This is the default final project for the Stanford CS 224N class. Please refer to the project handout on the course website for detailed instructions and an overview of the codebase.
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://huggingface.co/GlowCheese/minBERT/blob/main/LICENSE
4
+ language:
5
+ - en
6
+ base_model:
7
+ - google-research/bert
8
+ pipeline_tag: sentiment-analysis
9
+ library_name: transformers
10
+ tags:
11
+ - bert
12
+ - minbert
13
+ - trainsformer
14
+ - sentiment
15
+ - tokenizer
16
+ - classification
17
+ ---
18
+
19
  # CS 224N Default Final Project - Multitask BERT
20
 
21
  This is the default final project for the Stanford CS 224N class. Please refer to the project handout on the course website for detailed instructions and an overview of the codebase.
__pycache__/base_bert.cpython-38.pyc DELETED
Binary file (7.19 kB)
 
__pycache__/bert.cpython-38.pyc DELETED
Binary file (6.3 kB)
 
__pycache__/config.cpython-38.pyc DELETED
Binary file (6.64 kB)
 
__pycache__/optimizer.cpython-38.pyc DELETED
Binary file (2.37 kB)
 
__pycache__/tokenizer.cpython-38.pyc DELETED
Binary file (76.3 kB)
 
__pycache__/utils.cpython-38.pyc DELETED
Binary file (9.09 kB)
 
zemo1.py DELETED
@@ -1,53 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- from tqdm import tqdm
4
- import torch.optim as optim
5
-
6
- # Bước 1: Chuẩn bị dữ liệu mẫu
7
- # Dữ liệu giả: mỗi dòng là [giờ học, giờ giải trí, giờ ngủ], điểm trung bình
8
- data = [
9
- [2, 1, 7, 6.0],
10
- [3, 2, 6, 7.5],
11
- [1, 3, 8, 5.5],
12
- [4, 1, 6, 8.0],
13
- [5, 0, 5, 9.0],
14
- [6, 0, 6, 9.5]
15
- ]
16
-
17
- # Tách đặc trưng (features) và mục tiêu (target)
18
- X = torch.tensor([row[:3] for row in data], dtype=torch.float32) # Giờ học, giờ giải trí, giờ ngủ
19
- y = torch.tensor([[row[3]] for row in data], dtype=torch.float32) # Điểm trung bình
20
-
21
- # Bước 2: Xây dựng mô hình
22
- class StudentGradeModel(nn.Module):
23
- def __init__(self):
24
- super(StudentGradeModel, self).__init__()
25
- self.linear = nn.Linear(3, 1) # 3 đầu vào, 1 đầu ra
26
-
27
- def forward(self, x):
28
- return self.linear(x)
29
-
30
- model = StudentGradeModel()
31
-
32
- # Bước 3: Định nghĩa hàm mất mát và bộ tối ưu
33
- criterion = nn.MSELoss()
34
- optimizer = optim.SGD(model.parameters(), lr=0.01)
35
-
36
- # Bước 4: Huấn luyện mô hình
37
- for epoch in tqdm(range(10000), desc="Training Epochs"):
38
- optimizer.zero_grad() # Xóa gradient cũ
39
- output = model(X) # Truyền dữ liệu qua mô hình
40
- loss = criterion(output, y) # Tính mất mát
41
- loss.backward() # Tính gradient
42
- optimizer.step() # Cập nhật trọng số
43
-
44
- # In loss để theo dõi quá trình huấn luyện
45
- if (epoch + 1) % 1000 == 0:
46
- tqdm.write(f'Epoch [{epoch + 1}/10000], Loss: {loss.item():.4f}')
47
-
48
- # Bước 5: Dự đoán thử với một học sinh mới
49
- model.eval()
50
- with torch.no_grad():
51
- test_input = torch.tensor([[4, 1, 6]], dtype=torch.float32) # Ví dụ: 4 giờ học, 1 giờ giải trí, 6 giờ ngủ
52
- prediction = model(test_input)
53
- print("Dự đoán điểm trung bình:", prediction.item())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zemo2.py DELETED
@@ -1,41 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
-
4
- # Xây dựng mô hình RNN
5
- class RNNModel(nn.Module):
6
- def __init__(self, input_size, hidden_size, output_size):
7
- super(RNNModel, self).__init__()
8
- self.rnn = nn.RNN(input_size, hidden_size, batch_first=True) # Định nghĩa RNN
9
- self.fc = nn.Linear(hidden_size, output_size) # Lớp fully connected để dự đoán output
10
-
11
- def forward(self, x):
12
- out, _ = self.rnn(x) # Lấy output từ RNN
13
- out = out[:, -1, :] # Lấy output của bước cuối cùng (nếu dữ liệu có nhiều bước thời gian)
14
- out = self.fc(out) # Dự đoán output
15
- return out
16
-
17
- # Khởi tạo mô hình
18
- input_size = 10 # Kích thước đầu vào
19
- hidden_size = 20 # Số lượng hidden units
20
- output_size = 1 # Đầu ra (ví dụ: hồi quy)
21
- model = RNNModel(input_size, hidden_size, output_size)
22
-
23
- # Khởi tạo dữ liệu giả
24
- X = torch.randn(32, 5, 10) # 32 samples, 5 bước thời gian, mỗi bước có 10 đặc trưng
25
- y = torch.randn(32, 1) # 32 samples, 1 giá trị đầu ra cho mỗi sample
26
-
27
- # Hàm mất mát và bộ tối ưu
28
- criterion = nn.MSELoss()
29
- optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
30
-
31
- # Huấn luyện mô hình
32
- for epoch in range(100):
33
- model.train()
34
- optimizer.zero_grad()
35
- output = model(X) # Truyền dữ liệu qua mô hình
36
- loss = criterion(output, y) # Tính mất mát
37
- loss.backward() # Tính gradient
38
- optimizer.step() # Cập nhật trọng số
39
-
40
- if (epoch + 1) % 10 == 0:
41
- print(f'Epoch [{epoch+1}/100], Loss: {loss.item():.4f}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
zemo3.py DELETED
@@ -1,32 +0,0 @@
1
- import torch
2
- from tokenizer import BertTokenizer
3
- from torch import nn
4
- from bert import BertModel
5
-
6
- # Initialize the BERT tokenizer
7
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
8
-
9
- # Example sentence
10
- sentences = [
11
- "She loves reading novels in her free time",
12
- "An apple a day keeps the doctor away",
13
- "If you can't explain it simply, you don't understand it well enough."
14
- ]
15
-
16
- # Tokenize and encode the sentence
17
- encoding = tokenizer.batch_encode_plus(
18
- sentences,
19
- max_length=512,
20
- padding='max_length',
21
- truncation=True,
22
- return_tensors='pt'
23
- )
24
-
25
- # Get the token IDs from the encoding
26
- input_ids = encoding['input_ids']
27
- attention_mask = encoding['attention_mask']
28
-
29
- model = BertModel.from_pretrained('bert-base-uncased')
30
-
31
- assert isinstance(model, BertModel)
32
- print(model.embed(input_ids).size())