Taizo Kaneko commited on
Commit
3d6c14c
1 Parent(s): 70a4362

commit files to HF hub

Browse files
Files changed (2) hide show
  1. README.md +1 -0
  2. fasttext_jp_embedding.py +8 -2
README.md CHANGED
@@ -40,4 +40,5 @@ Google Colaboratory Example
40
  from transformers import pipeline
41
 
42
  pipeline = pipeline("feature-extraction", model="paulhindemith/fasttext-jp-embedding", revision="2022.11.6", trust_remote_code=True)
 
43
  ```
 
40
  from transformers import pipeline
41
 
42
  pipeline = pipeline("feature-extraction", model="paulhindemith/fasttext-jp-embedding", revision="2022.11.6", trust_remote_code=True)
43
+ pipeline("海賊王におれはなる")
44
  ```
fasttext_jp_embedding.py CHANGED
@@ -3,6 +3,7 @@ from transformers import PretrainedConfig
3
  from transformers import PreTrainedModel
4
  from torch import nn
5
  import torch
 
6
 
7
 
8
  class FastTextJpConfig(PretrainedConfig):
@@ -32,8 +33,13 @@ class FastTextJpModel(PreTrainedModel):
32
  self.word_embeddings = nn.Embedding(config.vocab_size,
33
  config.hidden_size)
34
 
35
- def forward(self, input_ids, **kwargs):
36
- return self.word_embeddings(torch.tensor([0]))
 
 
 
 
 
37
 
38
 
39
  # AutoModelに登録が必要だが、いろいろやり方が変わっているようで定まっていない。(2022/11/6)
 
3
  from transformers import PreTrainedModel
4
  from torch import nn
5
  import torch
6
+ from torchtyping import TensorType
7
 
8
 
9
  class FastTextJpConfig(PretrainedConfig):
 
33
  self.word_embeddings = nn.Embedding(config.vocab_size,
34
  config.hidden_size)
35
 
36
+ def forward(self, **inputs) -> TensorType["batch", "word", "vectors"]:
37
+ """embeddingを行います。
38
+
39
+ Returns:
40
+ TensorType["batch", "word", "vectors"]: 単語ごとにベクトルを返します。
41
+ """
42
+ return self.word_embeddings(torch.Tensor(inputs["input_ids"]))
43
 
44
 
45
  # AutoModelに登録が必要だが、いろいろやり方が変わっているようで定まっていない。(2022/11/6)