Uploading all files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +160 -0
- README.md +3 -0
- Student_en_hi/checkpoint_best.pt +3 -0
- Student_en_hi/checkpoint_use.pt +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/dict.en.txt +0 -0
- Student_en_hi/out_distill/tokenized.en-hi/dict.hi.txt +0 -0
- Student_en_hi/out_distill/tokenized.en-hi/preprocess.log +14 -0
- Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.en.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.en.idx +0 -0
- Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.hi.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.hi.idx +0 -0
- Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.en.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.en.idx +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.hi.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.hi.idx +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.en.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.en.idx +0 -0
- Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.hi.bin +3 -0
- Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.hi.idx +0 -0
- app.py +35 -0
- combined_proj.py +68 -0
- condaenv.rqs8eco6.requirements (copy).txt +64 -0
- condaenv.rqs8eco6.requirements.txt +64 -0
- condaenv.xig3xxzi.requirements.txt +64 -0
- law/checkpoint_best.pt +3 -0
- law/out/tokenized.en-hi/dict.en.txt +0 -0
- law/out/tokenized.en-hi/dict.hi.txt +0 -0
- law/out/tokenized.en-hi/preprocess.log +14 -0
- law/out/tokenized.en-hi/test.en-hi.en.bin +3 -0
- law/out/tokenized.en-hi/test.en-hi.en.idx +0 -0
- law/out/tokenized.en-hi/test.en-hi.hi.bin +3 -0
- law/out/tokenized.en-hi/test.en-hi.hi.idx +0 -0
- law/out/tokenized.en-hi/train.en-hi.en.bin +3 -0
- law/out/tokenized.en-hi/train.en-hi.en.idx +3 -0
- law/out/tokenized.en-hi/train.en-hi.hi.bin +3 -0
- law/out/tokenized.en-hi/train.en-hi.hi.idx +3 -0
- law/out/tokenized.en-hi/valid.en-hi.en.bin +3 -0
- law/out/tokenized.en-hi/valid.en-hi.en.idx +0 -0
- law/out/tokenized.en-hi/valid.en-hi.hi.bin +3 -0
- law/out/tokenized.en-hi/valid.en-hi.hi.idx +0 -0
- multi/checkpoint_best.pt +3 -0
- multi/out/tokenized.en-hi/dict.en.txt +0 -0
- multi/out/tokenized.en-hi/dict.hi.txt +0 -0
- multi/out/tokenized.en-hi/preprocess.log +14 -0
- multi/out/tokenized.en-hi/test.en-hi.en.bin +3 -0
- multi/out/tokenized.en-hi/test.en-hi.en.idx +0 -0
- multi/out/tokenized.en-hi/test.en-hi.hi.bin +3 -0
- multi/out/tokenized.en-hi/test.en-hi.hi.idx +0 -0
- multi/out/tokenized.en-hi/train.en-hi.en.bin +3 -0
- multi/out/tokenized.en-hi/train.en-hi.en.idx +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,163 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
law/out/tokenized.en-hi/train.en-hi.en.idx filter=lfs diff=lfs merge=lfs -text
|
37 |
+
law/out/tokenized.en-hi/train.en-hi.hi.idx filter=lfs diff=lfs merge=lfs -text
|
38 |
+
multi/out/tokenized.en-hi/train.en-hi.en.idx filter=lfs diff=lfs merge=lfs -text
|
39 |
+
multi/out/tokenized.en-hi/train.en-hi.hi.idx filter=lfs diff=lfs merge=lfs -text
|
40 |
+
sports/out/tokenized.en-hi/train.en-hi.en.idx filter=lfs diff=lfs merge=lfs -text
|
41 |
+
sports/out/tokenized.en-hi/train.en-hi.hi.idx filter=lfs diff=lfs merge=lfs -text
|
42 |
+
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.en.idx filter=lfs diff=lfs merge=lfs -text
|
43 |
+
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.hi.idx filter=lfs diff=lfs merge=lfs -text
|
44 |
+
translation/tools/mosesdecoder/bin/build_binary filter=lfs diff=lfs merge=lfs -text
|
45 |
+
translation/tools/mosesdecoder/bin/consolidate filter=lfs diff=lfs merge=lfs -text
|
46 |
+
translation/tools/mosesdecoder/bin/consolidate-direct filter=lfs diff=lfs merge=lfs -text
|
47 |
+
translation/tools/mosesdecoder/bin/consolidate-reverse filter=lfs diff=lfs merge=lfs -text
|
48 |
+
translation/tools/mosesdecoder/bin/CreateOnDiskPt filter=lfs diff=lfs merge=lfs -text
|
49 |
+
translation/tools/mosesdecoder/bin/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
|
50 |
+
translation/tools/mosesdecoder/bin/CreateProbingPT2 filter=lfs diff=lfs merge=lfs -text
|
51 |
+
translation/tools/mosesdecoder/bin/dump_counts filter=lfs diff=lfs merge=lfs -text
|
52 |
+
translation/tools/mosesdecoder/bin/evaluator filter=lfs diff=lfs merge=lfs -text
|
53 |
+
translation/tools/mosesdecoder/bin/extract filter=lfs diff=lfs merge=lfs -text
|
54 |
+
translation/tools/mosesdecoder/bin/extract-ghkm filter=lfs diff=lfs merge=lfs -text
|
55 |
+
translation/tools/mosesdecoder/bin/extract-lex filter=lfs diff=lfs merge=lfs -text
|
56 |
+
translation/tools/mosesdecoder/bin/extract-mixed-syntax filter=lfs diff=lfs merge=lfs -text
|
57 |
+
translation/tools/mosesdecoder/bin/extract-rules filter=lfs diff=lfs merge=lfs -text
|
58 |
+
translation/tools/mosesdecoder/bin/extractor filter=lfs diff=lfs merge=lfs -text
|
59 |
+
translation/tools/mosesdecoder/bin/filter filter=lfs diff=lfs merge=lfs -text
|
60 |
+
translation/tools/mosesdecoder/bin/filter-rule-table filter=lfs diff=lfs merge=lfs -text
|
61 |
+
translation/tools/mosesdecoder/bin/fragment filter=lfs diff=lfs merge=lfs -text
|
62 |
+
translation/tools/mosesdecoder/bin/gcc-7/debug/empty_test_static filter=lfs diff=lfs merge=lfs -text
|
63 |
+
translation/tools/mosesdecoder/bin/hgdecode filter=lfs diff=lfs merge=lfs -text
|
64 |
+
translation/tools/mosesdecoder/bin/kbmira filter=lfs diff=lfs merge=lfs -text
|
65 |
+
translation/tools/mosesdecoder/bin/kenlm_benchmark filter=lfs diff=lfs merge=lfs -text
|
66 |
+
translation/tools/mosesdecoder/bin/lexical-reordering-score filter=lfs diff=lfs merge=lfs -text
|
67 |
+
translation/tools/mosesdecoder/bin/lmbrgrid filter=lfs diff=lfs merge=lfs -text
|
68 |
+
translation/tools/mosesdecoder/bin/lmplz filter=lfs diff=lfs merge=lfs -text
|
69 |
+
translation/tools/mosesdecoder/bin/merge-sorted filter=lfs diff=lfs merge=lfs -text
|
70 |
+
translation/tools/mosesdecoder/bin/mert filter=lfs diff=lfs merge=lfs -text
|
71 |
+
translation/tools/mosesdecoder/bin/moses filter=lfs diff=lfs merge=lfs -text
|
72 |
+
translation/tools/mosesdecoder/bin/moses_chart filter=lfs diff=lfs merge=lfs -text
|
73 |
+
translation/tools/mosesdecoder/bin/pcfg-extract filter=lfs diff=lfs merge=lfs -text
|
74 |
+
translation/tools/mosesdecoder/bin/pcfg-score filter=lfs diff=lfs merge=lfs -text
|
75 |
+
translation/tools/mosesdecoder/bin/postprocess-egret-forests filter=lfs diff=lfs merge=lfs -text
|
76 |
+
translation/tools/mosesdecoder/bin/prepare-expected-bleu-training filter=lfs diff=lfs merge=lfs -text
|
77 |
+
translation/tools/mosesdecoder/bin/pro filter=lfs diff=lfs merge=lfs -text
|
78 |
+
translation/tools/mosesdecoder/bin/processLexicalTable filter=lfs diff=lfs merge=lfs -text
|
79 |
+
translation/tools/mosesdecoder/bin/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
|
80 |
+
translation/tools/mosesdecoder/bin/query filter=lfs diff=lfs merge=lfs -text
|
81 |
+
translation/tools/mosesdecoder/bin/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
|
82 |
+
translation/tools/mosesdecoder/bin/queryOnDiskPt filter=lfs diff=lfs merge=lfs -text
|
83 |
+
translation/tools/mosesdecoder/bin/relax-parse filter=lfs diff=lfs merge=lfs -text
|
84 |
+
translation/tools/mosesdecoder/bin/score filter=lfs diff=lfs merge=lfs -text
|
85 |
+
translation/tools/mosesdecoder/bin/score-stsg filter=lfs diff=lfs merge=lfs -text
|
86 |
+
translation/tools/mosesdecoder/bin/sentence-bleu filter=lfs diff=lfs merge=lfs -text
|
87 |
+
translation/tools/mosesdecoder/bin/sentence-bleu-nbest filter=lfs diff=lfs merge=lfs -text
|
88 |
+
translation/tools/mosesdecoder/bin/statistics filter=lfs diff=lfs merge=lfs -text
|
89 |
+
translation/tools/mosesdecoder/bin/train-expected-bleu filter=lfs diff=lfs merge=lfs -text
|
90 |
+
translation/tools/mosesdecoder/bin/vwtrainer filter=lfs diff=lfs merge=lfs -text
|
91 |
+
translation/tools/mosesdecoder/contrib/expected-bleu-training/bin/gcc-7/release/link-static/threading-multi/prepare-expected-bleu-training filter=lfs diff=lfs merge=lfs -text
|
92 |
+
translation/tools/mosesdecoder/contrib/expected-bleu-training/bin/gcc-7/release/link-static/threading-multi/train-expected-bleu filter=lfs diff=lfs merge=lfs -text
|
93 |
+
translation/tools/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1 filter=lfs diff=lfs merge=lfs -text
|
94 |
+
translation/tools/mosesdecoder/contrib/promix/test_data/esen.ep.model.filtered/phrase-table.0-0.1.1.binphr.tgtdata.wa filter=lfs diff=lfs merge=lfs -text
|
95 |
+
translation/tools/mosesdecoder/lib/libmert_lib.a filter=lfs diff=lfs merge=lfs -text
|
96 |
+
translation/tools/mosesdecoder/lib/libmoses.a filter=lfs diff=lfs merge=lfs -text
|
97 |
+
translation/tools/mosesdecoder/lib/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
|
98 |
+
translation/tools/mosesdecoder/lib/libsyntax_common.a filter=lfs diff=lfs merge=lfs -text
|
99 |
+
translation/tools/mosesdecoder/lm/bin/gcc-7/release/link-static/threading-multi/build_binary filter=lfs diff=lfs merge=lfs -text
|
100 |
+
translation/tools/mosesdecoder/lm/bin/gcc-7/release/link-static/threading-multi/fragment filter=lfs diff=lfs merge=lfs -text
|
101 |
+
translation/tools/mosesdecoder/lm/bin/gcc-7/release/link-static/threading-multi/kenlm_benchmark filter=lfs diff=lfs merge=lfs -text
|
102 |
+
translation/tools/mosesdecoder/lm/bin/gcc-7/release/link-static/threading-multi/query filter=lfs diff=lfs merge=lfs -text
|
103 |
+
translation/tools/mosesdecoder/lm/bin/left_test.test/gcc-7/release/link-static/threading-multi/left_test filter=lfs diff=lfs merge=lfs -text
|
104 |
+
translation/tools/mosesdecoder/lm/bin/model_test.test/gcc-7/release/link-static/threading-multi/model_test filter=lfs diff=lfs merge=lfs -text
|
105 |
+
translation/tools/mosesdecoder/lm/bin/model_test.test/gcc-7/release/link-static/threading-multi/model_test.o filter=lfs diff=lfs merge=lfs -text
|
106 |
+
translation/tools/mosesdecoder/lm/bin/partial_test.test/gcc-7/release/link-static/threading-multi/partial_test filter=lfs diff=lfs merge=lfs -text
|
107 |
+
translation/tools/mosesdecoder/lm/builder/bin/gcc-7/release/link-static/threading-multi/dump_counts filter=lfs diff=lfs merge=lfs -text
|
108 |
+
translation/tools/mosesdecoder/lm/builder/bin/gcc-7/release/link-static/threading-multi/lmplz filter=lfs diff=lfs merge=lfs -text
|
109 |
+
translation/tools/mosesdecoder/lm/filter/bin/gcc-7/release/link-static/threading-multi/filter filter=lfs diff=lfs merge=lfs -text
|
110 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/bleu_scorer_test filter=lfs diff=lfs merge=lfs -text
|
111 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/data_test filter=lfs diff=lfs merge=lfs -text
|
112 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/evaluator filter=lfs diff=lfs merge=lfs -text
|
113 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/extractor filter=lfs diff=lfs merge=lfs -text
|
114 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/feature_data_test filter=lfs diff=lfs merge=lfs -text
|
115 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/forest_rescore_test filter=lfs diff=lfs merge=lfs -text
|
116 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/hgdecode filter=lfs diff=lfs merge=lfs -text
|
117 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/hypergraph_test filter=lfs diff=lfs merge=lfs -text
|
118 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/kbmira filter=lfs diff=lfs merge=lfs -text
|
119 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/libmert_lib.a filter=lfs diff=lfs merge=lfs -text
|
120 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/mert filter=lfs diff=lfs merge=lfs -text
|
121 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/mira_feature_vector_test filter=lfs diff=lfs merge=lfs -text
|
122 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/ngram_test filter=lfs diff=lfs merge=lfs -text
|
123 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/optimizer_factory_test filter=lfs diff=lfs merge=lfs -text
|
124 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/point_test filter=lfs diff=lfs merge=lfs -text
|
125 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/pro filter=lfs diff=lfs merge=lfs -text
|
126 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/reference_test filter=lfs diff=lfs merge=lfs -text
|
127 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/sentence-bleu filter=lfs diff=lfs merge=lfs -text
|
128 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/sentence-bleu-nbest filter=lfs diff=lfs merge=lfs -text
|
129 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/TER/tools.o filter=lfs diff=lfs merge=lfs -text
|
130 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/timer_test filter=lfs diff=lfs merge=lfs -text
|
131 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/util_test filter=lfs diff=lfs merge=lfs -text
|
132 |
+
translation/tools/mosesdecoder/mert/bin/gcc-7/release/link-static/threading-multi/vocabulary_test filter=lfs diff=lfs merge=lfs -text
|
133 |
+
translation/tools/mosesdecoder/mert/evaluator filter=lfs diff=lfs merge=lfs -text
|
134 |
+
translation/tools/mosesdecoder/mert/extractor filter=lfs diff=lfs merge=lfs -text
|
135 |
+
translation/tools/mosesdecoder/mert/hgdecode filter=lfs diff=lfs merge=lfs -text
|
136 |
+
translation/tools/mosesdecoder/mert/kbmira filter=lfs diff=lfs merge=lfs -text
|
137 |
+
translation/tools/mosesdecoder/mert/mert filter=lfs diff=lfs merge=lfs -text
|
138 |
+
translation/tools/mosesdecoder/mert/pro filter=lfs diff=lfs merge=lfs -text
|
139 |
+
translation/tools/mosesdecoder/mert/sentence-bleu filter=lfs diff=lfs merge=lfs -text
|
140 |
+
translation/tools/mosesdecoder/mert/sentence-bleu-nbest filter=lfs diff=lfs merge=lfs -text
|
141 |
+
translation/tools/mosesdecoder/misc/bin/gcc-7/release/link-static/threading-multi/merge-sorted filter=lfs diff=lfs merge=lfs -text
|
142 |
+
translation/tools/mosesdecoder/misc/bin/gcc-7/release/link-static/threading-multi/processLexicalTable filter=lfs diff=lfs merge=lfs -text
|
143 |
+
translation/tools/mosesdecoder/misc/bin/gcc-7/release/link-static/threading-multi/prunePhraseTable filter=lfs diff=lfs merge=lfs -text
|
144 |
+
translation/tools/mosesdecoder/misc/bin/gcc-7/release/link-static/threading-multi/queryLexicalTable filter=lfs diff=lfs merge=lfs -text
|
145 |
+
translation/tools/mosesdecoder/moses-cmd/bin/gcc-7/release/link-static/threading-multi/lmbrgrid filter=lfs diff=lfs merge=lfs -text
|
146 |
+
translation/tools/mosesdecoder/moses-cmd/bin/gcc-7/release/link-static/threading-multi/moses filter=lfs diff=lfs merge=lfs -text
|
147 |
+
translation/tools/mosesdecoder/moses-cmd/bin/gcc-7/release/link-static/threading-multi/vwtrainer filter=lfs diff=lfs merge=lfs -text
|
148 |
+
translation/tools/mosesdecoder/moses/bin/gcc-7/release/link-static/threading-multi/libmoses.a filter=lfs diff=lfs merge=lfs -text
|
149 |
+
translation/tools/mosesdecoder/moses/bin/gcc-7/release/link-static/threading-multi/moses_test filter=lfs diff=lfs merge=lfs -text
|
150 |
+
translation/tools/mosesdecoder/moses/LM/bin/BackwardTest.test/gcc-7/release/link-static/threading-multi/BackwardTest filter=lfs diff=lfs merge=lfs -text
|
151 |
+
translation/tools/mosesdecoder/moses/TranslationModel/UG/util/ibm1-align filter=lfs diff=lfs merge=lfs -text
|
152 |
+
translation/tools/mosesdecoder/OnDiskPt/bin/gcc-7/release/link-static/threading-multi/CreateOnDiskPt filter=lfs diff=lfs merge=lfs -text
|
153 |
+
translation/tools/mosesdecoder/OnDiskPt/bin/gcc-7/release/link-static/threading-multi/queryOnDiskPt filter=lfs diff=lfs merge=lfs -text
|
154 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/consolidate filter=lfs diff=lfs merge=lfs -text
|
155 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/consolidate-direct filter=lfs diff=lfs merge=lfs -text
|
156 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/consolidate-reverse filter=lfs diff=lfs merge=lfs -text
|
157 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/extract filter=lfs diff=lfs merge=lfs -text
|
158 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/extract-lex filter=lfs diff=lfs merge=lfs -text
|
159 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/extract-rules filter=lfs diff=lfs merge=lfs -text
|
160 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/relax-parse filter=lfs diff=lfs merge=lfs -text
|
161 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/score filter=lfs diff=lfs merge=lfs -text
|
162 |
+
translation/tools/mosesdecoder/phrase-extract/bin/gcc-7/release/link-static/threading-multi/statistics filter=lfs diff=lfs merge=lfs -text
|
163 |
+
translation/tools/mosesdecoder/phrase-extract/bin/ScoreFeatureTest.test/gcc-7/release/link-static/threading-multi/ScoreFeatureTest filter=lfs diff=lfs merge=lfs -text
|
164 |
+
translation/tools/mosesdecoder/phrase-extract/extract-ghkm/bin/gcc-7/release/link-static/threading-multi/extract-ghkm filter=lfs diff=lfs merge=lfs -text
|
165 |
+
translation/tools/mosesdecoder/phrase-extract/extract-mixed-syntax/bin/gcc-7/release/link-static/threading-multi/extract-mixed-syntax filter=lfs diff=lfs merge=lfs -text
|
166 |
+
translation/tools/mosesdecoder/phrase-extract/filter-rule-table/bin/gcc-7/release/link-static/threading-multi/filter-rule-table filter=lfs diff=lfs merge=lfs -text
|
167 |
+
translation/tools/mosesdecoder/phrase-extract/lexical-reordering/bin/gcc-7/release/link-static/threading-multi/lexical-reordering-score filter=lfs diff=lfs merge=lfs -text
|
168 |
+
translation/tools/mosesdecoder/phrase-extract/pcfg-extract/bin/gcc-7/release/link-static/threading-multi/pcfg-extract filter=lfs diff=lfs merge=lfs -text
|
169 |
+
translation/tools/mosesdecoder/phrase-extract/pcfg-score/bin/gcc-7/release/link-static/threading-multi/pcfg-score filter=lfs diff=lfs merge=lfs -text
|
170 |
+
translation/tools/mosesdecoder/phrase-extract/postprocess-egret-forests/bin/gcc-7/release/link-static/threading-multi/postprocess-egret-forests filter=lfs diff=lfs merge=lfs -text
|
171 |
+
translation/tools/mosesdecoder/phrase-extract/score-stsg/bin/gcc-7/release/link-static/threading-multi/score-stsg filter=lfs diff=lfs merge=lfs -text
|
172 |
+
translation/tools/mosesdecoder/phrase-extract/syntax-common/bin/gcc-7/release/link-static/threading-multi/libsyntax_common.a filter=lfs diff=lfs merge=lfs -text
|
173 |
+
translation/tools/mosesdecoder/phrase-extract/syntax-common/bin/gcc-7/release/link-static/threading-multi/tree_fragment_tokenizer_test filter=lfs diff=lfs merge=lfs -text
|
174 |
+
translation/tools/mosesdecoder/phrase-extract/syntax-common/bin/gcc-7/release/link-static/threading-multi/tree_test filter=lfs diff=lfs merge=lfs -text
|
175 |
+
translation/tools/mosesdecoder/probingpt/bin/gcc-7/release/link-static/threading-multi/CreateProbingPT filter=lfs diff=lfs merge=lfs -text
|
176 |
+
translation/tools/mosesdecoder/probingpt/bin/gcc-7/release/link-static/threading-multi/libprobingpt.a filter=lfs diff=lfs merge=lfs -text
|
177 |
+
translation/tools/mosesdecoder/tools/GIZA++ filter=lfs diff=lfs merge=lfs -text
|
178 |
+
translation/tools/mosesdecoder/util/bin/file_piece_test.test/gcc-7/release/link-static/threading-multi/file_piece_test filter=lfs diff=lfs merge=lfs -text
|
179 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/bit_packing_test filter=lfs diff=lfs merge=lfs -text
|
180 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/integer_to_string_test filter=lfs diff=lfs merge=lfs -text
|
181 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/joint_sort_test filter=lfs diff=lfs merge=lfs -text
|
182 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/multi_intersection_test filter=lfs diff=lfs merge=lfs -text
|
183 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/pcqueue_test filter=lfs diff=lfs merge=lfs -text
|
184 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/probing_hash_table_test filter=lfs diff=lfs merge=lfs -text
|
185 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/random_test filter=lfs diff=lfs merge=lfs -text
|
186 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/sized_iterator_test filter=lfs diff=lfs merge=lfs -text
|
187 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/sorted_uniform_test filter=lfs diff=lfs merge=lfs -text
|
188 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/string_stream_test filter=lfs diff=lfs merge=lfs -text
|
189 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/tempfile_test filter=lfs diff=lfs merge=lfs -text
|
190 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/tokenize_piece_test filter=lfs diff=lfs merge=lfs -text
|
191 |
+
translation/tools/mosesdecoder/util/bin/gcc-7/release/link-static/threading-multi/tokenize_test filter=lfs diff=lfs merge=lfs -text
|
192 |
+
translation/tools/mosesdecoder/util/stream/bin/gcc-7/release/link-static/threading-multi/io_test filter=lfs diff=lfs merge=lfs -text
|
193 |
+
translation/tools/mosesdecoder/util/stream/bin/gcc-7/release/link-static/threading-multi/rewindable_stream_test filter=lfs diff=lfs merge=lfs -text
|
194 |
+
translation/tools/mosesdecoder/util/stream/bin/gcc-7/release/link-static/threading-multi/sort_test filter=lfs diff=lfs merge=lfs -text
|
195 |
+
translation/tools/mosesdecoder/util/stream/bin/gcc-7/release/link-static/threading-multi/stream_test filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Knowledge_Distillation
|
2 |
+
# Knowledge_Distillation
|
3 |
+
# Knowledge_Distillation
|
Student_en_hi/checkpoint_best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c8911f41f6540861aa40fcfb9d5981dc752421d776d04da5396f238aaa50217
|
3 |
+
size 542239631
|
Student_en_hi/checkpoint_use.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9be3052c4a080e8c4b198d674f4c6bca4540b3435b59bd3c8a5c06bc9cce383c
|
3 |
+
size 542239631
|
Student_en_hi/out_distill/tokenized.en-hi/dict.en.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Student_en_hi/out_distill/tokenized.en-hi/dict.hi.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Student_en_hi/out_distill/tokenized.en-hi/preprocess.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Namespace(aim_repo=None, aim_run_hash=None, align_suffix=None, alignfile=None, all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, azureml_logging=False, bf16=False, bpe=None, cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data/out_distill/tokenized.en-hi', dict_only=False, empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=False, log_file=None, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=-1, nwordstgt=-1, on_cpu_convert_precision=False, only_source=False, optimizer=None, padding_factor=8, plasma_path='/tmp/plasma', profile=False, quantization_config_path=None, reset_logging=False, scoring='bleu', seed=1, simul_type=None, source_lang='en', srcdict=None, suppress_crashes=False, target_lang='hi', task='translation', tensorboard_logdir=None, testpref='data/test-tok', tgtdict=None, threshold_loss_scale=None, thresholdsrc=2, thresholdtgt=2, tokenizer=None, tpu=False, trainpref='data/300k/train-distill-tok', use_plasma_view=False, user_dir=None, validpref='data/val-tok', wandb_project=None, workers=1)
|
2 |
+
[en] Dictionary: 58936 types
|
3 |
+
[en] data/300k/train-distill-tok.en: 300000 sents, 5789433 tokens, 1.15% replaced (by <unk>)
|
4 |
+
[en] Dictionary: 58936 types
|
5 |
+
[en] data/val-tok.en: 500 sents, 10356 tokens, 11.4% replaced (by <unk>)
|
6 |
+
[en] Dictionary: 58936 types
|
7 |
+
[en] data/test-tok.en: 512 sents, 10618 tokens, 10.3% replaced (by <unk>)
|
8 |
+
[hi] Dictionary: 50016 types
|
9 |
+
[hi] data/300k/train-distill-tok.hi: 300000 sents, 5911137 tokens, 0.805% replaced (by <unk>)
|
10 |
+
[hi] Dictionary: 50016 types
|
11 |
+
[hi] data/val-tok.hi: 500 sents, 12623 tokens, 11.7% replaced (by <unk>)
|
12 |
+
[hi] Dictionary: 50016 types
|
13 |
+
[hi] data/test-tok.hi: 512 sents, 12873 tokens, 11.9% replaced (by <unk>)
|
14 |
+
Wrote preprocessed data to data/out_distill/tokenized.en-hi
|
Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d685ac6f981d12de73d411ae6073d0fee80f41ae4d7d58f08dba05ab5ad02e5f
|
3 |
+
size 21236
|
Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.en.idx
ADDED
Binary file (6.17 kB). View file
|
|
Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e2895393e04355588e8d6598828e5e36204836f36ed44ee8ca6240c5f6dfd31
|
3 |
+
size 25746
|
Student_en_hi/out_distill/tokenized.en-hi/test.en-hi.hi.idx
ADDED
Binary file (6.17 kB). View file
|
|
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1167edf171a77ebce492207f26204094d276534193955ab0dd195269fb52edaf
|
3 |
+
size 11578866
|
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.en.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:334d38e43994c107cfd85b2cd898ad97e7465c5e276db4cc695f53ea9473f1d0
|
3 |
+
size 3600026
|
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5225e8c8a8aa49899c35133429b0a815e3c3a7d70e93b90424e1d353ad26f4e1
|
3 |
+
size 11822274
|
Student_en_hi/out_distill/tokenized.en-hi/train.en-hi.hi.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e42346dab63cac05dadb348209b2e8aad638b201f1d358617997b9227f13348b
|
3 |
+
size 3600026
|
Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d84bd3627c54e815fba863e344c6494e2ac45fc9fc6d8d4f0491796981920e94
|
3 |
+
size 20712
|
Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.en.idx
ADDED
Binary file (6.03 kB). View file
|
|
Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a4602e6401451f0867a7892ebc9d98953389c661d027c591f3265d92d01c071
|
3 |
+
size 25246
|
Student_en_hi/out_distill/tokenized.en-hi/valid.en-hi.hi.idx
ADDED
Binary file (6.03 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
from datetime import datetime
|
6 |
+
import subprocess
|
7 |
+
from fairseq.models.transformer import TransformerModel
|
8 |
+
time_interval=0
|
9 |
+
st.title('Knowledge Distillation in Neural Machine Translation')
|
10 |
+
title = st.text_input('English Text', 'I welcome you to the demonstration.')
|
11 |
+
|
12 |
+
if st.button('En-Hi Teacher'):
|
13 |
+
time_1 = datetime.now()
|
14 |
+
#subprocess.run('source ~/miniconda3/etc/profile.d/conda.sh && conda init bash')
|
15 |
+
file1 = open("translation/input-files/flores/eng.devtest","w")
|
16 |
+
file1.write(title)
|
17 |
+
file1.close()
|
18 |
+
subprocess.run('cd translation && bash -i translate-en-hi.sh && cd ..', shell=True)
|
19 |
+
time_2 = datetime.now()
|
20 |
+
time_interval = time_2 - time_1
|
21 |
+
file1 = open("translation/output-translation/flores/test-flores.hi","r")
|
22 |
+
st.write('Hindi Translation: ',file1.read())
|
23 |
+
|
24 |
+
file1.close()
|
25 |
+
st.write('Inference Time: ',time_interval)
|
26 |
+
|
27 |
+
if st.button('En-Hi Student'):
|
28 |
+
#title = re.sub('([.,!?()])', r' \1 ', title)
|
29 |
+
#title = re.sub('\s{2,}', ' ', title)
|
30 |
+
time_1 = datetime.now()
|
31 |
+
zh2en = TransformerModel.from_pretrained('Student_en_hi/out_distill/tokenized.en-hi/', checkpoint_file='../../checkpoint_use.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
32 |
+
time_2 = datetime.now()
|
33 |
+
time_interval = time_2 - time_1
|
34 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
35 |
+
st.write('Inference Time: ',time_interval)
|
combined_proj.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
from datetime import datetime
|
6 |
+
import subprocess
|
7 |
+
from fairseq.models.transformer import TransformerModel
|
8 |
+
time_interval=0
|
9 |
+
st.title('Knowledge Distillation in Neural Machine Translation')
|
10 |
+
title = st.text_input('English Text', 'I welcome you to the demonstration.')
|
11 |
+
|
12 |
+
if st.button('En-Hi Teacher'):
|
13 |
+
time_1 = datetime.now()
|
14 |
+
#subprocess.run('source ~/miniconda3/etc/profile.d/conda.sh && conda init bash')
|
15 |
+
file1 = open("translation/input-files/flores/eng.devtest","w")
|
16 |
+
file1.write(title)
|
17 |
+
file1.close()
|
18 |
+
subprocess.run('cd translation && bash -i translate-en-hi.sh && cd ..', shell=True)
|
19 |
+
time_2 = datetime.now()
|
20 |
+
time_interval = time_2 - time_1
|
21 |
+
file1 = open("translation/output-translation/flores/test-flores.hi","r")
|
22 |
+
st.write('Hindi Translation: ',file1.read())
|
23 |
+
|
24 |
+
file1.close()
|
25 |
+
st.write('Inference Time: ',time_interval)
|
26 |
+
|
27 |
+
if st.button('En-Hi Student'):
|
28 |
+
#title = re.sub('([.,!?()])', r' \1 ', title)
|
29 |
+
#title = re.sub('\s{2,}', ' ', title)
|
30 |
+
time_1 = datetime.now()
|
31 |
+
zh2en = TransformerModel.from_pretrained('Student_en_hi/out_distill/tokenized.en-hi/', checkpoint_file='../../checkpoint_use.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
32 |
+
time_2 = datetime.now()
|
33 |
+
time_interval = time_2 - time_1
|
34 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
35 |
+
st.write('Inference Time: ',time_interval)
|
36 |
+
|
37 |
+
|
38 |
+
if st.button('Law En-Hi Teacher'):
|
39 |
+
time_1 = datetime.now()
|
40 |
+
zh2en = TransformerModel.from_pretrained('law/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
41 |
+
time_2 = datetime.now()
|
42 |
+
time_interval = time_2 - time_1
|
43 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
44 |
+
st.write('Inference Time: ',time_interval)
|
45 |
+
|
46 |
+
if st.button('Sports En-Hi Teacher'):
|
47 |
+
time_1 = datetime.now()
|
48 |
+
zh2en = TransformerModel.from_pretrained('sports/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
49 |
+
time_2 = datetime.now()
|
50 |
+
time_interval = time_2 - time_1
|
51 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
52 |
+
st.write('Inference Time: ',time_interval)
|
53 |
+
|
54 |
+
if st.button('Tourism En-Hi Teacher'):
|
55 |
+
time_1 = datetime.now()
|
56 |
+
zh2en = TransformerModel.from_pretrained('tourism/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
57 |
+
time_2 = datetime.now()
|
58 |
+
time_interval = time_2 - time_1
|
59 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
60 |
+
st.write('Inference Time: ',time_interval)
|
61 |
+
|
62 |
+
if st.button('Multi-Domain En-Hi Student'):
|
63 |
+
time_1 = datetime.now()
|
64 |
+
zh2en = TransformerModel.from_pretrained('multi/out/tokenized.en-hi/', checkpoint_file='../../checkpoint_best.pt',bpe='subword_nmt', bpe_codes='/home/sakharam/RnD/translation/en-hi/bpe-codes/codes.en',tokenizer='moses')
|
65 |
+
time_2 = datetime.now()
|
66 |
+
time_interval = time_2 - time_1
|
67 |
+
st.write('Hindi Translation: ',zh2en.translate([title.lower()])[0])
|
68 |
+
st.write('Inference Time: ',time_interval)
|
condaenv.rqs8eco6.requirements (copy).txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.9.0
|
2 |
+
alabaster==0.7.12
|
3 |
+
antlr4-python3-runtime==4.8
|
4 |
+
babel==2.8.0
|
5 |
+
ctranslate2==2.1.0
|
6 |
+
cython==0.29.23
|
7 |
+
docutils==0.16
|
8 |
+
et-xmlfile==1.1.0
|
9 |
+
filelock==3.0.12
|
10 |
+
flask==1.1.2
|
11 |
+
future==0.18.2
|
12 |
+
google-auth==1.19.2
|
13 |
+
google-auth-oauthlib==0.4.1
|
14 |
+
grpcio==1.30.0
|
15 |
+
huggingface-hub==0.0.8
|
16 |
+
hydra-core==1.0.6
|
17 |
+
imagesize==1.2.0
|
18 |
+
importlib-resources==5.1.2
|
19 |
+
indic-nlp-library==0.6
|
20 |
+
indicnlp==0.0.1
|
21 |
+
itsdangerous==1.1.0
|
22 |
+
jinja2==2.11.2
|
23 |
+
markdown==3.2.2
|
24 |
+
markupsafe==1.1.1
|
25 |
+
morfessor==2.0.6
|
26 |
+
oauthlib==3.1.0
|
27 |
+
omegaconf==2.0.6
|
28 |
+
openpyxl==3.0.9
|
29 |
+
packaging==20.4
|
30 |
+
pandas==1.0.5
|
31 |
+
polyglot==16.7.4
|
32 |
+
portalocker==2.0.0
|
33 |
+
protobuf==3.12.2
|
34 |
+
pyasn1-modules==0.2.8
|
35 |
+
pycld2==0.41
|
36 |
+
pygments==2.6.1
|
37 |
+
pyicu==2.7.4
|
38 |
+
pyonmttok==1.31.0
|
39 |
+
pyyaml==5.3.1
|
40 |
+
requests-oauthlib==1.3.0
|
41 |
+
sacrebleu==1.5.1
|
42 |
+
sacremoses==0.0.45
|
43 |
+
sentencepiece==0.1.95
|
44 |
+
snowballstemmer==2.0.0
|
45 |
+
sphinx==3.1.2
|
46 |
+
sphinx-argparse==0.2.5
|
47 |
+
sphinx-rtd-theme==0.5.0
|
48 |
+
sphinxcontrib-applehelp==1.0.2
|
49 |
+
sphinxcontrib-devhelp==1.0.2
|
50 |
+
sphinxcontrib-htmlhelp==1.0.3
|
51 |
+
sphinxcontrib-jsmath==1.0.1
|
52 |
+
sphinxcontrib-qthelp==1.0.3
|
53 |
+
sphinxcontrib-serializinghtml==1.1.4
|
54 |
+
subword-nmt==0.3.7
|
55 |
+
tensorboard==2.3.0
|
56 |
+
tensorboard-plugin-wit==1.7.0
|
57 |
+
tokenizers==0.10.2
|
58 |
+
torch==1.11.0
|
59 |
+
torchtext==0.5.0
|
60 |
+
tqdm==4.30.0
|
61 |
+
transformers==4.6.0
|
62 |
+
waitress==1.4.4
|
63 |
+
werkzeug==1.0.1
|
64 |
+
xlrd==2.0.1
|
condaenv.rqs8eco6.requirements.txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.9.0
|
2 |
+
alabaster==0.7.12
|
3 |
+
antlr4-python3-runtime==4.8
|
4 |
+
babel==2.8.0
|
5 |
+
ctranslate2==2.1.0
|
6 |
+
cython==0.29.23
|
7 |
+
docutils==0.16
|
8 |
+
et-xmlfile==1.1.0
|
9 |
+
filelock==3.0.12
|
10 |
+
flask==1.1.2
|
11 |
+
future==0.18.2
|
12 |
+
google-auth==1.19.2
|
13 |
+
google-auth-oauthlib==0.4.1
|
14 |
+
grpcio==1.30.0
|
15 |
+
huggingface-hub==0.0.8
|
16 |
+
hydra-core==1.0.6
|
17 |
+
imagesize==1.2.0
|
18 |
+
importlib-resources==5.1.2
|
19 |
+
indic-nlp-library==0.6
|
20 |
+
indicnlp==0.0.1
|
21 |
+
itsdangerous==1.1.0
|
22 |
+
jinja2==2.11.2
|
23 |
+
markdown==3.2.2
|
24 |
+
markupsafe==1.1.1
|
25 |
+
morfessor==2.0.6
|
26 |
+
oauthlib==3.1.0
|
27 |
+
omegaconf==2.0.6
|
28 |
+
openpyxl==3.0.9
|
29 |
+
packaging==20.4
|
30 |
+
pandas==1.0.5
|
31 |
+
polyglot==16.7.4
|
32 |
+
portalocker==2.0.0
|
33 |
+
protobuf==3.12.2
|
34 |
+
pyasn1-modules==0.2.8
|
35 |
+
pycld2==0.41
|
36 |
+
pygments==2.6.1
|
37 |
+
pyicu==2.7.4
|
38 |
+
pyonmttok==1.31.0
|
39 |
+
pyyaml==5.3.1
|
40 |
+
requests-oauthlib==1.3.0
|
41 |
+
sacrebleu==1.5.1
|
42 |
+
sacremoses==0.0.45
|
43 |
+
sentencepiece==0.1.95
|
44 |
+
snowballstemmer==2.0.0
|
45 |
+
sphinx==3.1.2
|
46 |
+
sphinx-argparse==0.2.5
|
47 |
+
sphinx-rtd-theme==0.5.0
|
48 |
+
sphinxcontrib-applehelp==1.0.2
|
49 |
+
sphinxcontrib-devhelp==1.0.2
|
50 |
+
sphinxcontrib-htmlhelp==1.0.3
|
51 |
+
sphinxcontrib-jsmath==1.0.1
|
52 |
+
sphinxcontrib-qthelp==1.0.3
|
53 |
+
sphinxcontrib-serializinghtml==1.1.4
|
54 |
+
subword-nmt==0.3.7
|
55 |
+
tensorboard==2.3.0
|
56 |
+
tensorboard-plugin-wit==1.7.0
|
57 |
+
tokenizers==0.10.2
|
58 |
+
torch==1.11.0
|
59 |
+
torchtext==0.5.0
|
60 |
+
tqdm==4.30.0
|
61 |
+
transformers==4.6.0
|
62 |
+
waitress==1.4.4
|
63 |
+
werkzeug==1.0.1
|
64 |
+
xlrd==2.0.1
|
condaenv.xig3xxzi.requirements.txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.9.0
|
2 |
+
alabaster==0.7.12
|
3 |
+
antlr4-python3-runtime==4.8
|
4 |
+
babel==2.8.0
|
5 |
+
ctranslate2==2.1.0
|
6 |
+
cython==0.29.23
|
7 |
+
docutils==0.16
|
8 |
+
et-xmlfile==1.1.0
|
9 |
+
filelock==3.0.12
|
10 |
+
flask==1.1.2
|
11 |
+
future==0.18.2
|
12 |
+
google-auth==1.19.2
|
13 |
+
google-auth-oauthlib==0.4.1
|
14 |
+
grpcio==1.30.0
|
15 |
+
huggingface-hub==0.0.8
|
16 |
+
hydra-core==1.0.6
|
17 |
+
imagesize==1.2.0
|
18 |
+
importlib-resources==5.1.2
|
19 |
+
indic-nlp-library==0.6
|
20 |
+
indicnlp==0.0.1
|
21 |
+
itsdangerous==1.1.0
|
22 |
+
jinja2==2.11.2
|
23 |
+
markdown==3.2.2
|
24 |
+
markupsafe==1.1.1
|
25 |
+
morfessor==2.0.6
|
26 |
+
oauthlib==3.1.0
|
27 |
+
omegaconf==2.0.6
|
28 |
+
openpyxl==3.0.9
|
29 |
+
packaging==20.4
|
30 |
+
pandas==1.0.5
|
31 |
+
polyglot==16.7.4
|
32 |
+
portalocker==2.0.0
|
33 |
+
protobuf==3.12.2
|
34 |
+
pyasn1-modules==0.2.8
|
35 |
+
pycld2==0.41
|
36 |
+
pygments==2.6.1
|
37 |
+
pyicu==2.7.4
|
38 |
+
pyonmttok==1.31.0
|
39 |
+
pyyaml==5.3.1
|
40 |
+
requests-oauthlib==1.3.0
|
41 |
+
sacrebleu==1.5.1
|
42 |
+
sacremoses==0.0.45
|
43 |
+
sentencepiece==0.1.95
|
44 |
+
snowballstemmer==2.0.0
|
45 |
+
sphinx==3.1.2
|
46 |
+
sphinx-argparse==0.2.5
|
47 |
+
sphinx-rtd-theme==0.5.0
|
48 |
+
sphinxcontrib-applehelp==1.0.2
|
49 |
+
sphinxcontrib-devhelp==1.0.2
|
50 |
+
sphinxcontrib-htmlhelp==1.0.3
|
51 |
+
sphinxcontrib-jsmath==1.0.1
|
52 |
+
sphinxcontrib-qthelp==1.0.3
|
53 |
+
sphinxcontrib-serializinghtml==1.1.4
|
54 |
+
subword-nmt==0.3.7
|
55 |
+
tensorboard==2.3.0
|
56 |
+
tensorboard-plugin-wit==1.7.0
|
57 |
+
tokenizers==0.10.2
|
58 |
+
torch==1.11.0
|
59 |
+
torchtext==0.5.0
|
60 |
+
tqdm==4.30.0
|
61 |
+
transformers==4.6.0
|
62 |
+
waitress==1.4.4
|
63 |
+
werkzeug==1.0.1
|
64 |
+
xlrd==2.0.1
|
law/checkpoint_best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:735aa82f3b7745db14a4308a27ff07ef2bf202df418725abe457dbe25022b448
|
3 |
+
size 2710751487
|
law/out/tokenized.en-hi/dict.en.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
law/out/tokenized.en-hi/dict.hi.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
law/out/tokenized.en-hi/preprocess.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Namespace(aim_repo=None, aim_run_hash=None, align_suffix=None, alignfile=None, all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, azureml_logging=False, bf16=False, bpe=None, cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data/law/en-hi/out/tokenized.en-hi', dict_only=False, empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=False, log_file=None, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=-1, nwordstgt=-1, on_cpu_convert_precision=False, only_source=False, optimizer=None, padding_factor=8, plasma_path='/tmp/plasma', profile=False, quantization_config_path=None, reset_logging=False, scoring='bleu', seed=1, simul_type=None, source_lang='en', srcdict=None, suppress_crashes=False, target_lang='hi', task='translation', tensorboard_logdir=None, testpref='data/law/en-hi/test-tok', tgtdict=None, threshold_loss_scale=None, thresholdsrc=2, thresholdtgt=2, tokenizer=None, tpu=False, trainpref='data/law/en-hi/train-tok', use_plasma_view=False, user_dir=None, validpref='data/law/en-hi/dev-tok', wandb_project=None, workers=1)
|
2 |
+
[en] Dictionary: 118024 types
|
3 |
+
[en] data/law/en-hi/train-tok.en: 1025032 sents, 30011217 tokens, 0.333% replaced (by <unk>)
|
4 |
+
[en] Dictionary: 118024 types
|
5 |
+
[en] data/law/en-hi/dev-tok.en: 1000 sents, 33974 tokens, 0.992% replaced (by <unk>)
|
6 |
+
[en] Dictionary: 118024 types
|
7 |
+
[en] data/law/en-hi/test-tok.en: 1000 sents, 35659 tokens, 0.623% replaced (by <unk>)
|
8 |
+
[hi] Dictionary: 118264 types
|
9 |
+
[hi] data/law/en-hi/train-tok.hi: 1025032 sents, 31969543 tokens, 0.461% replaced (by <unk>)
|
10 |
+
[hi] Dictionary: 118264 types
|
11 |
+
[hi] data/law/en-hi/dev-tok.hi: 1000 sents, 36540 tokens, 0.868% replaced (by <unk>)
|
12 |
+
[hi] Dictionary: 118264 types
|
13 |
+
[hi] data/law/en-hi/test-tok.hi: 1000 sents, 37993 tokens, 0.511% replaced (by <unk>)
|
14 |
+
Wrote preprocessed data to data/law/en-hi/out/tokenized.en-hi
|
law/out/tokenized.en-hi/test.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64e6381139617bb5bb506789e3a550e13cba134e67670159c8e990f500ee45d2
|
3 |
+
size 142636
|
law/out/tokenized.en-hi/test.en-hi.en.idx
ADDED
Binary file (12 kB). View file
|
|
law/out/tokenized.en-hi/test.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a797fa7a5981a20d5cbf82c0c2b989565f5f00b0b1628b5c75e06da66d904f3
|
3 |
+
size 151972
|
law/out/tokenized.en-hi/test.en-hi.hi.idx
ADDED
Binary file (12 kB). View file
|
|
law/out/tokenized.en-hi/train.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9574d08bfed5ae631900d2a5ae77572bcc03c500f3923a9ba986efade9f2d968
|
3 |
+
size 120044868
|
law/out/tokenized.en-hi/train.en-hi.en.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71af1ef7a84225c6245ee1abb557a9c26e7490b1127122f3e2e945dff67c4044
|
3 |
+
size 12300410
|
law/out/tokenized.en-hi/train.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ea2dbb73d23b49a01c504141cf3434797f8fbde8c688ab165ac75ce016c8afe
|
3 |
+
size 127878172
|
law/out/tokenized.en-hi/train.en-hi.hi.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a651cf04fca2caa22c94f36734311f03dd34761ba1f7625e04495e281dc06ebf
|
3 |
+
size 12300410
|
law/out/tokenized.en-hi/valid.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc769efb358e20cf3672fdfae395de226726c31ebfd311a5dd1e2d3f3ff3ef76
|
3 |
+
size 135896
|
law/out/tokenized.en-hi/valid.en-hi.en.idx
ADDED
Binary file (12 kB). View file
|
|
law/out/tokenized.en-hi/valid.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51b7c5bd10a2c3dc356919f9f8a2a31b795f3e0b74d96bdce71d7c2abc04c6a9
|
3 |
+
size 146160
|
law/out/tokenized.en-hi/valid.en-hi.hi.idx
ADDED
Binary file (12 kB). View file
|
|
multi/checkpoint_best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:970dfdca6e5d70e30b1f39e6671eb2407ebfdef074e2978a242b834af761b573
|
3 |
+
size 568009339
|
multi/out/tokenized.en-hi/dict.en.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
multi/out/tokenized.en-hi/dict.hi.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
multi/out/tokenized.en-hi/preprocess.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Namespace(aim_repo=None, aim_run_hash=None, align_suffix=None, alignfile=None, all_gather_list_size=16384, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, azureml_logging=False, bf16=False, bpe=None, cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data/multi_domain/en-hi/out/tokenized.en-hi', dict_only=False, empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=False, log_file=None, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=-1, nwordstgt=-1, on_cpu_convert_precision=False, only_source=False, optimizer=None, padding_factor=8, plasma_path='/tmp/plasma', profile=False, quantization_config_path=None, reset_logging=False, scoring='bleu', seed=1, simul_type=None, source_lang='en', srcdict=None, suppress_crashes=False, target_lang='hi', task='translation', tensorboard_logdir=None, testpref='data/multi_domain/en-hi/test-tok', tgtdict=None, threshold_loss_scale=None, thresholdsrc=2, thresholdtgt=2, tokenizer=None, tpu=False, trainpref='data/multi_domain/en-hi/train-tok', use_plasma_view=False, user_dir=None, validpref='data/multi_domain/en-hi/dev-tok', wandb_project=None, workers=1)
|
2 |
+
[en] Dictionary: 132096 types
|
3 |
+
[en] data/multi_domain/en-hi/train-tok.en: 1216645 sents, 34463387 tokens, 0.0251% replaced (by <unk>)
|
4 |
+
[en] Dictionary: 132096 types
|
5 |
+
[en] data/multi_domain/en-hi/dev-tok.en: 3000 sents, 75857 tokens, 1.66% replaced (by <unk>)
|
6 |
+
[en] Dictionary: 132096 types
|
7 |
+
[en] data/multi_domain/en-hi/test-tok.en: 3000 sents, 76475 tokens, 1.45% replaced (by <unk>)
|
8 |
+
[hi] Dictionary: 18456 types
|
9 |
+
[hi] data/multi_domain/en-hi/train-tok.hi: 1216645 sents, 36390060 tokens, 0.0059% replaced (by <unk>)
|
10 |
+
[hi] Dictionary: 18456 types
|
11 |
+
[hi] data/multi_domain/en-hi/dev-tok.hi: 3000 sents, 78788 tokens, 5.57% replaced (by <unk>)
|
12 |
+
[hi] Dictionary: 18456 types
|
13 |
+
[hi] data/multi_domain/en-hi/test-tok.hi: 3000 sents, 79441 tokens, 4.37% replaced (by <unk>)
|
14 |
+
Wrote preprocessed data to data/multi_domain/en-hi/out/tokenized.en-hi
|
multi/out/tokenized.en-hi/test.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b70fa2fc3fd8a6281e3ef7c27dbde1872fa27c1365ed9971074a572e2c49682
|
3 |
+
size 305900
|
multi/out/tokenized.en-hi/test.en-hi.en.idx
ADDED
Binary file (36 kB). View file
|
|
multi/out/tokenized.en-hi/test.en-hi.hi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62db206e87427d277b4c1b78e9714e9dba3af4c4d56e068518c14bd2fd6c3de4
|
3 |
+
size 158882
|
multi/out/tokenized.en-hi/test.en-hi.hi.idx
ADDED
Binary file (36 kB). View file
|
|
multi/out/tokenized.en-hi/train.en-hi.en.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac8a6bf85fb71cd62f1cee156df7800e59f50afb21be184927da6516724648b3
|
3 |
+
size 137853548
|
multi/out/tokenized.en-hi/train.en-hi.en.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ede07c149a5d012f9e28f416dfe9016cffce058d09ca86e193b0d172d9c8b65f
|
3 |
+
size 14599766
|