Spaces:
Sleeping
Sleeping
import os | |
import urllib.request as request | |
import zipfile | |
from textsummarizer.logging import logger | |
from textsummarizer.utils.common import get_size | |
from textsummarizer.entity.config_entity import DataIngestionConfig | |
from pathlib import Path | |
class DataIngestion: | |
def __init__(self, config: DataIngestionConfig): | |
self.config = config | |
def download_file(self): | |
if not os.path.exists(self.config.local_data_file): | |
filename, headers = request.urlretrieve( | |
url=self.config.source_URL, | |
filename=self.config.local_data_file | |
) | |
logger.info(f'Download! with following info: \n{headers}') | |
else: | |
logger.info(f'File already exist of size: {get_size(Path(self.config.local_data_file))}') | |
def extract_file(self): | |
unzip_dir = self.config.unzip_dir | |
os.makedirs(unzip_dir, exist_ok=True) | |
with zipfile.ZipFile(self.config.local_data_file, 'r') as file : | |
file.extractall(unzip_dir) | |