Upload 5 files
Browse files- .gitattributes +17 -35
- README.md +38 -1
- config.json +26 -0
- pytorch_model.bin +3 -0
- vocab.txt +0 -0
.gitattributes
CHANGED
@@ -1,35 +1,17 @@
|
|
1 |
-
*.
|
2 |
-
*.
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.
|
5 |
-
*.
|
6 |
-
*.
|
7 |
-
*.
|
8 |
-
*.
|
9 |
-
*.
|
10 |
-
*.
|
11 |
-
*.
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.
|
15 |
-
*.
|
16 |
-
*.
|
17 |
-
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,3 +1,40 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language: "en"
|
3 |
+
tags:
|
4 |
+
- financial-sentiment-analysis
|
5 |
+
- sentiment-analysis
|
6 |
+
widget:
|
7 |
+
- text: "growth is strong and we have plenty of liquidity"
|
8 |
---
|
9 |
+
|
10 |
+
`FinBERT` is a BERT model pre-trained on financial communication text. The purpose is to enhance financial NLP research and practice. It is trained on the following three financial communication corpus. The total corpora size is 4.9B tokens.
|
11 |
+
- Corporate Reports 10-K & 10-Q: 2.5B tokens
|
12 |
+
- Earnings Call Transcripts: 1.3B tokens
|
13 |
+
- Analyst Reports: 1.1B tokens
|
14 |
+
|
15 |
+
This released version of `FinBert` is fine-tuned on 10,000 manually annotated (positive, negative, neutral) sentences from analyst reports. It is periodically updated with fresh data and annotations as financial language changes. The basis for this version comes from `yiyanghkust/finbert-tone`.
|
16 |
+
|
17 |
+
It's built off of the Academic work:
|
18 |
+
|
19 |
+
Huang, Allen H., Hui Wang, and Yi Yang. "FinBERT: A Large Language Model for Extracting Information from Financial Text." *Contemporary Accounting Research* (2022).
|
20 |
+
|
21 |
+
|
22 |
+
# How to use
|
23 |
+
You can use this model with Transformers pipeline for sentiment analysis.
|
24 |
+
```python
|
25 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
26 |
+
from transformers import pipeline
|
27 |
+
|
28 |
+
finbert = BertForSequenceClassification.from_pretrained('Hatman/finbert',num_labels=3)
|
29 |
+
tokenizer = BertTokenizer.from_pretrained('Hatman/finbert')
|
30 |
+
|
31 |
+
nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)
|
32 |
+
|
33 |
+
sentences = ["there is a shortage of capital, and we need extra financing",
|
34 |
+
"growth is strong and we have plenty of liquidity",
|
35 |
+
"there are doubts about our finances",
|
36 |
+
"profits are flat"]
|
37 |
+
results = nlp(sentences)
|
38 |
+
print(results) #LABEL_0: neutral; LABEL_1: positive; LABEL_2: negative
|
39 |
+
|
40 |
+
```
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForSequenceClassification"
|
4 |
+
],
|
5 |
+
"id2label": {
|
6 |
+
"0": "Neutral",
|
7 |
+
"1": "Positive",
|
8 |
+
"2": "Negative"
|
9 |
+
},
|
10 |
+
"label2id": {
|
11 |
+
"Positive": 1,
|
12 |
+
"Negative": 2,
|
13 |
+
"Neutral": 0
|
14 |
+
},
|
15 |
+
"attention_probs_dropout_prob": 0.1,
|
16 |
+
"hidden_act": "gelu",
|
17 |
+
"hidden_dropout_prob": 0.1,
|
18 |
+
"hidden_size": 768,
|
19 |
+
"initializer_range": 0.02,
|
20 |
+
"intermediate_size": 3072,
|
21 |
+
"max_position_embeddings": 512,
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
+
"type_vocab_size": 2,
|
25 |
+
"vocab_size": 30873
|
26 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f31c2036e91c9854bcc35141d16669dd07b9726adfe391d1011bff1de7ea4b32
|
3 |
+
size 439101405
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|