import urllib import ssl from tqdm import tqdm import os def get_dataset_info(cfg): download_urls, filenames, check_validity = cfg["download_links"], cfg["filenames"], cfg["check_validity"] return download_urls, filenames, check_validity # Modified from torchvision as some datasets cant pass the certificate validity check: # https://github.com/pytorch/vision/blob/868a3b42f4bffe29e4414ad7e4c7d9d0b4690ecb/torchvision/datasets/utils.py#L27C1-L32C40 def urlretrieve(url, filename, chunk_size=1024 * 32, check_validity=True): os.makedirs(os.path.dirname(filename), exist_ok=True) ctx = ssl.create_default_context() if not check_validity: ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE request = urllib.request.Request(url) with urllib.request.urlopen(request, context=ctx) as response: with open(filename, "wb") as fh, tqdm(total=response.length, unit="B", unit_scale=True) as pbar: while chunk := response.read(chunk_size): fh.write(chunk) pbar.update(len(chunk))