1234567891011121314151617181920212223 |
- import urllib
- import ssl
- from tqdm import tqdm
- import os
- def get_dataset_info(cfg):
- download_urls, filenames, check_validity = cfg["download_links"], cfg["filenames"], cfg["check_validity"]
- return download_urls, filenames, check_validity
- # Modified from torchvision as some datasets cant pass the certificate validity check:
- # https://github.com/pytorch/vision/blob/868a3b42f4bffe29e4414ad7e4c7d9d0b4690ecb/torchvision/datasets/utils.py#L27C1-L32C40
- def urlretrieve(url, filename, chunk_size=1024 * 32, check_validity=True):
- os.makedirs(os.path.dirname(filename), exist_ok=True)
- ctx = ssl.create_default_context()
- if not check_validity:
- ctx.check_hostname = False
- ctx.verify_mode = ssl.CERT_NONE
- request = urllib.request.Request(url)
- with urllib.request.urlopen(request, context=ctx) as response:
- with open(filename, "wb") as fh, tqdm(total=response.length, unit="B", unit_scale=True) as pbar:
- while chunk := response.read(chunk_size):
- fh.write(chunk)
- pbar.update(len(chunk))
|