Update README.md
Browse files
README.md
CHANGED
@@ -5,9 +5,9 @@ license: mit
|
|
5 |
```python
|
6 |
import pandas as pd
|
7 |
from datasets import load_dataset
|
8 |
-
from transformers import
|
9 |
-
model =
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
11 |
forced_bos_id = forced_bos_token_id = tokenizer.get_lang_id("bn")
|
12 |
|
13 |
|
@@ -21,7 +21,7 @@ def process_row(row: List, row_index: int):
|
|
21 |
row_cell_values = []
|
22 |
for cell_value in row:
|
23 |
if isinstance(cell_value, int) or isinstance(cell_value, float):
|
24 |
-
cell_value =
|
25 |
row_cell_values.append(str(cell_value))
|
26 |
else:
|
27 |
row_cell_values.append(cell_value)
|
@@ -34,7 +34,7 @@ def process_row(row: List, row_index: int):
|
|
34 |
def process_table(table_content: Dict):
|
35 |
table_str = process_header(table_content["header"]) + " "
|
36 |
for i, row_example in enumerate(table_content["rows"]):
|
37 |
-
table_str +=
|
38 |
return table_str.strip()
|
39 |
|
40 |
# load the dataset
|
|
|
5 |
```python
|
6 |
import pandas as pd
|
7 |
from datasets import load_dataset
|
8 |
+
from transformers import M2M100ForConditionalGeneration
|
9 |
+
model = M2M100ForConditionalGeneration.from_pretrained("vaishali/BnTQA-M2M")
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("vaishali/BnTQA-M2M", src_lang="bn", tgt_lang="bn")
|
11 |
forced_bos_id = forced_bos_token_id = tokenizer.get_lang_id("bn")
|
12 |
|
13 |
|
|
|
21 |
row_cell_values = []
|
22 |
for cell_value in row:
|
23 |
if isinstance(cell_value, int) or isinstance(cell_value, float):
|
24 |
+
cell_value = convert_engDigit_to_bengali(str(cell_value))
|
25 |
row_cell_values.append(str(cell_value))
|
26 |
else:
|
27 |
row_cell_values.append(cell_value)
|
|
|
34 |
def process_table(table_content: Dict):
|
35 |
table_str = process_header(table_content["header"]) + " "
|
36 |
for i, row_example in enumerate(table_content["rows"]):
|
37 |
+
table_str += process_row(row_example, row_index=i + 1) + " "
|
38 |
return table_str.strip()
|
39 |
|
40 |
# load the dataset
|