Update README.md
Browse files
README.md
CHANGED
@@ -52,7 +52,7 @@ for root, dirs, files in os.walk(test/):
|
|
52 |
test_dataset= load_dataset("json", data_files=[os.path.join(root,i) for i in files],split="train")
|
53 |
|
54 |
#Remove unnecessary chars
|
55 |
-
chars_to_ignore_regex = '[
|
56 |
def remove_special_characters(batch):
|
57 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
|
58 |
return batch
|
@@ -99,7 +99,7 @@ import re
|
|
99 |
for root, dirs, files in os.walk(test/):
|
100 |
test_dataset = load_dataset("json", data_files=[os.path.join(root,i) for i in files],split="train")
|
101 |
|
102 |
-
chars_to_ignore_regex = '[
|
103 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
|
104 |
return batch
|
105 |
|
@@ -149,7 +149,7 @@ The script used for training can be found [here](https://colab.research.google.c
|
|
149 |
|
150 |
# Collaborators on this project
|
151 |
|
152 |
-
- Chris C. Emezue ([Twitter](https://twitter.com/ChrisEmezue))([email protected])
|
153 |
-
- Bonaventure F.P. Dossou ([Twitter](https://twitter.com/bonadossou))([email protected])
|
154 |
|
155 |
## This is a joint project continuing our research on [OkwuGbé: End-to-End Speech Recognition for Fon and Igbo](https://arxiv.org/abs/2103.07762)
|
|
|
52 |
test_dataset= load_dataset("json", data_files=[os.path.join(root,i) for i in files],split="train")
|
53 |
|
54 |
#Remove unnecessary chars
|
55 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\%\\‘\\”]'
|
56 |
def remove_special_characters(batch):
|
57 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
|
58 |
return batch
|
|
|
99 |
for root, dirs, files in os.walk(test/):
|
100 |
test_dataset = load_dataset("json", data_files=[os.path.join(root,i) for i in files],split="train")
|
101 |
|
102 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\%\\‘\\”]'
|
103 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
|
104 |
return batch
|
105 |
|
|
|
149 |
|
150 |
# Collaborators on this project
|
151 |
|
152 |
+
- Chris C. Emezue ([Twitter](https://twitter.com/ChrisEmezue))|([email protected])
|
153 |
+
- Bonaventure F.P. Dossou (HuggingFace Username: [bonadossou](https://huggingface.co/bonadossou))|([Twitter](https://twitter.com/bonadossou))|([email protected])
|
154 |
|
155 |
## This is a joint project continuing our research on [OkwuGbé: End-to-End Speech Recognition for Fon and Igbo](https://arxiv.org/abs/2103.07762)
|