rbawden commited on
Commit
49e0b5e
1 Parent(s): eac4b97

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -2
README.md CHANGED
@@ -23,15 +23,16 @@ The model is to be used with the custom pipeline available in in the original re
23
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
24
  from pipeline import NormalisationPipeline # N.B. local file
25
 
 
26
  tokeniser = AutoTokenizer.from_pretrained("rbawden/modern_french_normalisation")
27
  model = AutoModelForSeq2SeqLM.from_pretrained("rbawden/modern_french_normalisation")
28
- norm_pipeline = NormalisationPipeline(model=model, tokenizer=tokeniser, batch_size=batch_size,beam_size=beam_size)
29
 
30
  list_inputs = ["Elle haïſſoit particulierement le Cardinal de Lorraine;", "Adieu, i'iray chez vous tantoſt vous rendre grace."]
31
  list_outputs = norm_pipeline(list_inputs)
32
  print(list_outputs)
33
 
34
- >> ["Elle haïssait particulièrement le Cardinal de Lorraine;", "Adieu, j'irai chez vous tantôt vous rendre grâce."]
35
  ```
36
 
37
  ### Limitations and bias
 
23
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
24
  from pipeline import NormalisationPipeline # N.B. local file
25
 
26
+ cache_lexicon_path="~/.normalisation_lex.pickle" # optionally set a path to store the processed lexicon (speeds up loading)
27
  tokeniser = AutoTokenizer.from_pretrained("rbawden/modern_french_normalisation")
28
  model = AutoModelForSeq2SeqLM.from_pretrained("rbawden/modern_french_normalisation")
29
+ norm_pipeline = NormalisationPipeline(model=model, tokenizer=tokeniser, batch_size=32, beam_size=5, cache_file=cache_lexicon_path)
30
 
31
  list_inputs = ["Elle haïſſoit particulierement le Cardinal de Lorraine;", "Adieu, i'iray chez vous tantoſt vous rendre grace."]
32
  list_outputs = norm_pipeline(list_inputs)
33
  print(list_outputs)
34
 
35
+ >> [{'text': 'Elle haïssait particulièrement le Cardinal de Lorraine; ', 'alignment': [([0, 3], [0, 3]), ([5, 12], [5, 12]), ([14, 29], [14, 29]), ([31, 32], [31, 32]), ([34, 41], [34, 41]), ([43, 44], [43, 44]), ([46, 53], [46, 53]), ([54, 54], [54, 54])]}, {'text': "Adieu, j'irai chez vous tantôt vous rendre grâce. ", 'alignment': [([0, 4], [0, 4]), ([5, 5], [5, 5]), ([7, 8], [7, 8]), ([9, 12], [9, 12]), ([14, 17], [14, 17]), ([19, 22], [19, 22]), ([24, 30], [24, 29]), ([32, 35], [31, 34]), ([37, 42], [36, 41]), ([44, 48], [43, 47]), ([49, 49], [48, 48])]}]
36
  ```
37
 
38
  ### Limitations and bias