File size: 2,209 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#
# Import all of the components to be composed
#
import wrappers.tokenizer.tokenizer as tokeniser

#
# Component definition
#
#                 +---------+                +---------+                          +---------+                          +---------+
# src_filename -->+         +--> filename -->+-- src --+--> tokenised_filename -->+---------+--> tokenised_filename -->+         +--> tokenised_src_filename
#                 |         |                |         |                          |         |                          |         |
# trg_filename -->+         +--> filename -->+---------+-------> filename ------->+-- trg --+--> tokenised_filename -->+         +--> tokenised_trg_filename
#                 +---------+                +---------+                          +---------+                          +---------+
#                                  Config: {language::String,              Config: {language::String,
#                                           tokenisation_dir::String,               tokenisation_dir::String,
#                                           moses_installation_dir::String}         moses_installation_dir::String}
#
component src_trg_tokeniser
  inputs (src_filename), (trg_filename)
  outputs (tokenised_src_filename), (tokenised_trg_filename)
  configuration tokeniser.src.language,
                tokeniser.src.tokenisation_dir,
                tokeniser.trg.language,
                tokeniser.trg.tokenisation_dir,
                tokeniser.moses.installation
  declare
    src_tokeniser := new tokeniser with
      tokeniser.src.language -> corpus.language,
      tokeniser.src.tokenisation_dir -> working.directory.root,
      tokeniser.moses.installation -> moses.installation
    trg_tokeniser := new tokeniser with
      tokeniser.trg.language -> corpus.language,
      tokeniser.trg.tokenisation_dir -> working.directory.root,
      tokeniser.moses.installation -> moses.installation
  as
    wire (src_filename -> corpus.filename),
         (trg_filename -> corpus.filename) >>>
    (src_tokeniser *** trg_tokeniser) >>>
    wire (corpus.tokenised.filename -> tokenised_src_filename),
         (corpus.tokenised.filename -> tokenised_trg_filename)