import numpy as np import pandas as pd import seaborn as sns from typing import Optional, List, Tuple, Any from collections import OrderedDict import os, ast, re, string, torch, transformers, datasets, chardet, gdown from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder from torch.utils.data import Dataset, DataLoader from sklearn.model_selection import train_test_split from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, Trainer, TrainingArguments, logging, RobertaForTokenClassification, RobertaConfig, AutoConfig from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence from torchcrf import CRF from accelerate import Accelerator import torch.nn as nn import torch.nn.functional as F import underthesea from utils import * from all_datasets import * from model import * from huggingface_hub import login import PIL, fitz, pdf2image, re, unicodedata from transformers import AutoProcessor, LayoutLMv3ForTokenClassification from unidecode import unidecode from pathlib import Path from nltk import everygrams from collections import Counter from typing import List, Optional from datetime import datetime from dateutil import parser, relativedelta