Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from dataclasses import dataclass | |
from logging.handlers import RotatingFileHandler | |
from pathlib import Path | |
from rich.logging import RichHandler | |
# from nltk.corpus import stopwords | |
# nltk.download("stopwords") | |
class Cfg: | |
STOPWORDS = [ | |
"i", | |
"me", | |
"my", | |
"myself", | |
"we", | |
"our", | |
"ours", | |
"ourselves", | |
"you", | |
"you're", | |
"you've", | |
"you'll", | |
"you'd", | |
"your", | |
"yours", | |
"yourself", | |
"yourselves", | |
"he", | |
"him", | |
"his", | |
"himself", | |
"she", | |
"she's", | |
"her", | |
"hers", | |
"herself", | |
"it", | |
"it's", | |
"its", | |
"itself", | |
"they", | |
"them", | |
"their", | |
"theirs", | |
"themselves", | |
"what", | |
"which", | |
"who", | |
"whom", | |
"this", | |
"that", | |
"that'll", | |
"these", | |
"those", | |
"am", | |
"is", | |
"are", | |
"was", | |
"were", | |
"be", | |
"been", | |
"being", | |
"have", | |
"has", | |
"had", | |
"having", | |
"do", | |
"does", | |
"did", | |
"doing", | |
"a", | |
"an", | |
"the", | |
"and", | |
"but", | |
"if", | |
"or", | |
"because", | |
"as", | |
"until", | |
"while", | |
"of", | |
"at", | |
"by", | |
"for", | |
"with", | |
"about", | |
"against", | |
"between", | |
"into", | |
"through", | |
"during", | |
"before", | |
"after", | |
"above", | |
"below", | |
"to", | |
"from", | |
"up", | |
"down", | |
"in", | |
"out", | |
"on", | |
"off", | |
"over", | |
"under", | |
"again", | |
"further", | |
"then", | |
"once", | |
"here", | |
"there", | |
"when", | |
"where", | |
"why", | |
"how", | |
"all", | |
"any", | |
"both", | |
"each", | |
"few", | |
"more", | |
"most", | |
"other", | |
"some", | |
"such", | |
"no", | |
"nor", | |
"not", | |
"only", | |
"own", | |
"same", | |
"so", | |
"than", | |
"too", | |
"very", | |
"s", | |
"t", | |
"can", | |
"will", | |
"just", | |
"don", | |
"don't", | |
"should", | |
"should've", | |
"now", | |
"d", | |
"ll", | |
"m", | |
"o", | |
"re", | |
"ve", | |
"y", | |
"ain", | |
"aren", | |
"aren't", | |
"couldn", | |
"couldn't", | |
"didn", | |
"didn't", | |
"doesn", | |
"doesn't", | |
"hadn", | |
"hadn't", | |
"hasn", | |
"hasn't", | |
"haven", | |
"haven't", | |
"isn", | |
"isn't", | |
"ma", | |
"mightn", | |
"mightn't", | |
"mustn", | |
"mustn't", | |
"needn", | |
"needn't", | |
"shan", | |
"shan't", | |
"shouldn", | |
"shouldn't", | |
"wasn", | |
"wasn't", | |
"weren", | |
"weren't", | |
"won", | |
"won't", | |
"wouldn", | |
"wouldn't", | |
] | |
dataset_loc = os.path.join((Path(__file__).parent.parent.parent), "dataset", "raw", "news_dataset.csv") | |
preprocessed_data_path = os.path.join((Path(__file__).parent.parent.parent), "dataset", "preprocessed") | |
sweep_config_path = os.path.join((Path(__file__).parent), "sweep_config.yaml") | |
# Logs path | |
logs_path = os.path.join((Path(__file__).parent.parent.parent), "logs") | |
artifacts_path = os.path.join((Path(__file__).parent.parent.parent), "artifacts") | |
model_path = os.path.join((Path(__file__).parent.parent.parent), "artifacts", "model.pt") | |
test_size = 0.2 | |
add_special_tokens = True | |
max_len = 50 | |
pad_to_max_length = True | |
truncation = True | |
change_config = False | |
dropout_pb = 0.5 | |
lr = 1e-4 | |
lr_redfactor = 0.7 | |
lr_redpatience = 4 | |
epochs = 10 | |
batch_size = 128 | |
num_classes = 7 | |
sweep_run = 10 | |
index_to_class = {0: "Business", 1: "Entertainment", 2: "Health", 3: "Science", 4: "Sports", 5: "Technology", 6: "Worldwide"} | |
# Create logs folder | |
os.makedirs(Cfg.logs_path, exist_ok=True) | |
# Get root logger | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
# Create handlers | |
console_handler = RichHandler(markup=True) | |
console_handler.setLevel(logging.INFO) | |
info_handler = RotatingFileHandler( | |
filename=Path(Cfg.logs_path, "info.log"), | |
maxBytes=10485760, # 1 MB | |
backupCount=10, | |
) | |
info_handler.setLevel(logging.INFO) | |
error_handler = RotatingFileHandler( | |
filename=Path(Cfg.logs_path, "error.log"), | |
maxBytes=10485760, # 1 MB | |
backupCount=10, | |
) | |
error_handler.setLevel(logging.ERROR) | |
# Create formatters | |
minimal_formatter = logging.Formatter(fmt="%(message)s") | |
detailed_formatter = logging.Formatter(fmt="%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n") | |
# Hook it all up | |
console_handler.setFormatter(fmt=minimal_formatter) | |
info_handler.setFormatter(fmt=detailed_formatter) | |
error_handler.setFormatter(fmt=detailed_formatter) | |
logger.addHandler(hdlr=console_handler) | |
logger.addHandler(hdlr=info_handler) | |
logger.addHandler(hdlr=error_handler) | |