data_io.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. import json
  10. import os
  11. from dataclasses import dataclass
  12. from datetime import datetime, timedelta
  13. from typing import Any
  14. @dataclass
  15. class FilePath:
  16. def get(self) -> str:
  17. raise NotImplementedError
  18. def __str__(self):
  19. return self.get()
  20. @dataclass
  21. class SimpleFilePath(FilePath):
  22. file_name: str
  23. def get(self) -> str:
  24. return self.file_name
  25. def write_results_to_file(
  26. results: list[Any], output_file_path: FilePath, quiet_mode: bool = False
  27. ) -> None:
  28. with open(output_file_path.get(), "w") as f:
  29. json.dump(results, f, ensure_ascii=False, indent=4)
  30. if not quiet_mode:
  31. print(f"Written data to {output_file_path}")
  32. def read_results_from_file(file_path: FilePath, quiet_mode: bool = False) -> list[Any]:
  33. with open(file_path.get()) as f:
  34. data = json.load(f)
  35. if not quiet_mode:
  36. print(f"Loaded data from {file_path}")
  37. return data
  38. def exists_file_with_recent_data(
  39. file_path: FilePath, max_allowed_cache_age_in_hours: int | None
  40. ) -> bool:
  41. if not exists_file(file_path):
  42. return False
  43. if max_allowed_cache_age_in_hours is None:
  44. return True
  45. modification_date = get_last_modification_date(file_path)
  46. max_modification_date = datetime.now() - timedelta(
  47. hours=max_allowed_cache_age_in_hours
  48. )
  49. return modification_date > max_modification_date
  50. def exists_file(file_path: FilePath) -> bool:
  51. return os.path.isfile(file_path.get())
  52. def get_last_modification_date(file_path: FilePath) -> datetime:
  53. modification_date_as_sec_since_epoch = os.path.getmtime(file_path.get())
  54. return datetime.utcfromtimestamp(modification_date_as_sec_since_epoch)