imryanxu commited on
Commit
b3c19c1
·
verified ·
1 Parent(s): 3853df6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -24
README.md CHANGED
@@ -15,34 +15,25 @@ To collect training samples, we use the **Qwen-72B** model to thoroughly annotat
15
  ## Quickstart
16
  Here is an example code snippet for generating financial relevance scores using this model.
17
  ```python
18
- import torch
19
- from datasets import load_dataset
20
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
21
 
22
- model_name = "fin-model-en-v0.1"
23
- dataset_file = "your_dataset.jsonl"
24
- text_column = "text"
25
- output_file = "your_output.jsonl"
26
 
27
- tokenizer = AutoTokenizer.from_pretrained(model_name)
28
- model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=torch.bfloat16)
29
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
- model.to(device)
31
 
32
- dataset = load_dataset('json', data_files=dataset_file, cache_dir="cache/", split='train', num_proc=12)
 
 
33
 
 
 
 
 
 
 
34
 
35
- def compute_scores(batch):
36
- inputs = tokenizer(batch[text_column], return_tensors="pt", padding="longest", truncation=True).to(device)
37
- with torch.no_grad():
38
- outputs = model(**inputs)
39
- logits = outputs.logits.squeeze(-1).float().cpu().numpy()
40
-
41
- batch["fin_score"] = logits.tolist()
42
- batch["fin_int_score"] = [int(round(max(0, min(score, 5)))) for score in logits]
43
- return batch
44
-
45
-
46
- dataset = dataset.map(compute_scores, batched=True, batch_size=512)
47
- dataset.to_json(output_file)
48
  ```
 
15
  ## Quickstart
16
  Here is an example code snippet for generating financial relevance scores using this model.
17
  ```python
 
 
18
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
19
 
20
+ text = "You are a smart robot"
21
+ fin_model_name = "fin-model-en-v0.1"
 
 
22
 
23
+ fin_tokenizer = AutoTokenizer.from_pretrained(fin_model_name)
24
+ fin_model = AutoModelForSequenceClassification.from_pretrained(fin_model_name)
 
 
25
 
26
+ fin_inputs = fin_tokenizer(text, return_tensors="pt", padding="longest", truncation=True)
27
+ fin_outputs = fin_model(**fin_inputs)
28
+ fin_logits = fin_outputs.logits.squeeze(-1).float().detach().numpy()
29
 
30
+ fin_score = fin_logits.item()
31
+ result = {
32
+ "text": text,
33
+ "fin_score": fin_score,
34
+ "fin_int_score": int(round(max(0, min(fin_score, 5))))
35
+ }
36
 
37
+ print(result)
38
+ # {'text': 'You are a smart robot', 'fin_score': 0.3258197605609894, 'fin_int_score': 0}
 
 
 
 
 
 
 
 
 
 
 
39
  ```