Dmitry Chaplinsky commited on
Commit
c2e7c9e
·
1 Parent(s): 72f6b63

Yet another try

Browse files
Files changed (1) hide show
  1. pipeline.py +5 -10
pipeline.py CHANGED
@@ -1,13 +1,14 @@
1
  from typing import Dict, List, Any
2
  from nemo.collections.nlp.models import PunctuationCapitalizationModel
3
 
4
- class PreTrainedPipeline():
 
5
  def __init__(self, path=""):
6
  # IMPLEMENT_THIS
7
  # Preload all the elements you are going to need at inference.
8
  # For instance your model, processors, tokenizer that might be needed.
9
  # This function is only called once, so do all the heavy processing I/O here"""
10
- self.model = PunctuationCapitalizationModel.from_pretrained("dchaplinsky/punctuation_uk_bert")
11
 
12
  def __call__(self, inputs: str) -> List[Dict[str, Any]]:
13
  """
@@ -31,14 +32,8 @@ class PreTrainedPipeline():
31
  offset = 0
32
  for tok, lab in zip(tokens, labels):
33
  if lab != "OO":
34
- res.append({
35
- "entity_group": lab,
36
- "word": tok,
37
- "start": offset,
38
- "end": offset + len(tok),
39
- "score": 1
40
- })
41
 
42
  offset += len(tok) + 1
43
 
44
- return res
 
1
  from typing import Dict, List, Any
2
  from nemo.collections.nlp.models import PunctuationCapitalizationModel
3
 
4
+
5
+ class PreTrainedPipeline:
6
  def __init__(self, path=""):
7
  # IMPLEMENT_THIS
8
  # Preload all the elements you are going to need at inference.
9
  # For instance your model, processors, tokenizer that might be needed.
10
  # This function is only called once, so do all the heavy processing I/O here"""
11
+ self.model = PunctuationCapitalizationModel.restore_from("punctuation_uk_bert.nemo")
12
 
13
  def __call__(self, inputs: str) -> List[Dict[str, Any]]:
14
  """
 
32
  offset = 0
33
  for tok, lab in zip(tokens, labels):
34
  if lab != "OO":
35
+ res.append({"entity_group": lab, "word": tok, "start": offset, "end": offset + len(tok), "score": 0.99})
 
 
 
 
 
 
36
 
37
  offset += len(tok) + 1
38
 
39
+ return res