fetching.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. # -*- coding: utf-8 -*-
  2. # Based on code from the vispy project
  3. # Distributed under the (new) BSD License. See LICENSE.txt for more info.
  4. """Data downloading and reading functions"""
  5. from math import log
  6. import os
  7. from os import path as op
  8. import sys
  9. import shutil
  10. import time
  11. from . import appdata_dir, resource_dirs
  12. from . import StdoutProgressIndicator, urlopen
  13. class InternetNotAllowedError(IOError):
  14. """Plugins that need resources can just use get_remote_file(), but
  15. should catch this error and silently ignore it.
  16. """
  17. pass
  18. class NeedDownloadError(IOError):
  19. """Is raised when a remote file is requested that is not locally
  20. available, but which needs to be explicitly downloaded by the user.
  21. """
  22. def get_remote_file(fname, directory=None, force_download=False, auto=True):
  23. """Get a the filename for the local version of a file from the web
  24. Parameters
  25. ----------
  26. fname : str
  27. The relative filename on the remote data repository to download.
  28. These correspond to paths on
  29. ``https://github.com/imageio/imageio-binaries/``.
  30. directory : str | None
  31. The directory where the file will be cached if a download was
  32. required to obtain the file. By default, the appdata directory
  33. is used. This is also the first directory that is checked for
  34. a local version of the file. If the directory does not exist,
  35. it will be created.
  36. force_download : bool | str
  37. If True, the file will be downloaded even if a local copy exists
  38. (and this copy will be overwritten). Can also be a YYYY-MM-DD date
  39. to ensure a file is up-to-date (modified date of a file on disk,
  40. if present, is checked).
  41. auto : bool
  42. Whether to auto-download the file if its not present locally. Default
  43. True. If False and a download is needed, raises NeedDownloadError.
  44. Returns
  45. -------
  46. fname : str
  47. The path to the file on the local system.
  48. """
  49. _url_root = "https://github.com/imageio/imageio-binaries/raw/master/"
  50. url = _url_root + fname
  51. nfname = op.normcase(fname) # convert to native
  52. # Get dirs to look for the resource
  53. given_directory = directory
  54. directory = given_directory or appdata_dir("imageio")
  55. dirs = resource_dirs()
  56. dirs.insert(0, directory) # Given dir has preference
  57. # Try to find the resource locally
  58. for dir in dirs:
  59. filename = op.join(dir, nfname)
  60. if op.isfile(filename):
  61. if not force_download: # we're done
  62. if given_directory and given_directory != dir:
  63. filename2 = os.path.join(given_directory, nfname)
  64. # Make sure the output directory exists
  65. if not op.isdir(op.dirname(filename2)):
  66. os.makedirs(op.abspath(op.dirname(filename2)))
  67. shutil.copy(filename, filename2)
  68. return filename2
  69. return filename
  70. if isinstance(force_download, str):
  71. ntime = time.strptime(force_download, "%Y-%m-%d")
  72. ftime = time.gmtime(op.getctime(filename))
  73. if ftime >= ntime:
  74. if given_directory and given_directory != dir:
  75. filename2 = os.path.join(given_directory, nfname)
  76. # Make sure the output directory exists
  77. if not op.isdir(op.dirname(filename2)):
  78. os.makedirs(op.abspath(op.dirname(filename2)))
  79. shutil.copy(filename, filename2)
  80. return filename2
  81. return filename
  82. else:
  83. print("File older than %s, updating..." % force_download)
  84. break
  85. # If we get here, we're going to try to download the file
  86. if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"):
  87. raise InternetNotAllowedError(
  88. "Will not download resource from the "
  89. "internet because environment variable "
  90. "IMAGEIO_NO_INTERNET is set."
  91. )
  92. # Can we proceed with auto-download?
  93. if not auto:
  94. raise NeedDownloadError()
  95. # Get filename to store to and make sure the dir exists
  96. filename = op.join(directory, nfname)
  97. if not op.isdir(op.dirname(filename)):
  98. os.makedirs(op.abspath(op.dirname(filename)))
  99. # let's go get the file
  100. if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover
  101. # On CI, we retry a few times ...
  102. for i in range(2):
  103. try:
  104. _fetch_file(url, filename)
  105. return filename
  106. except IOError:
  107. time.sleep(0.5)
  108. else:
  109. _fetch_file(url, filename)
  110. return filename
  111. else: # pragma: no cover
  112. _fetch_file(url, filename)
  113. return filename
  114. def _fetch_file(url, file_name, print_destination=True):
  115. """Load requested file, downloading it if needed or requested
  116. Parameters
  117. ----------
  118. url: string
  119. The url of file to be downloaded.
  120. file_name: string
  121. Name, along with the path, of where downloaded file will be saved.
  122. print_destination: bool, optional
  123. If true, destination of where file was saved will be printed after
  124. download finishes.
  125. resume: bool, optional
  126. If true, try to resume partially downloaded files.
  127. """
  128. # Adapted from NISL:
  129. # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
  130. print(
  131. "Imageio: %r was not found on your computer; "
  132. "downloading it now." % os.path.basename(file_name)
  133. )
  134. temp_file_name = file_name + ".part"
  135. local_file = None
  136. initial_size = 0
  137. errors = []
  138. for tries in range(4):
  139. try:
  140. # Checking file size and displaying it alongside the download url
  141. remote_file = urlopen(url, timeout=5.0)
  142. file_size = int(remote_file.headers["Content-Length"].strip())
  143. size_str = _sizeof_fmt(file_size)
  144. print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str))
  145. # Downloading data (can be extended to resume if need be)
  146. local_file = open(temp_file_name, "wb")
  147. _chunk_read(remote_file, local_file, initial_size=initial_size)
  148. # temp file must be closed prior to the move
  149. if not local_file.closed:
  150. local_file.close()
  151. shutil.move(temp_file_name, file_name)
  152. if print_destination is True:
  153. sys.stdout.write("File saved as %s.\n" % file_name)
  154. break
  155. except Exception as e:
  156. errors.append(e)
  157. print("Error while fetching file: %s." % str(e))
  158. finally:
  159. if local_file is not None:
  160. if not local_file.closed:
  161. local_file.close()
  162. else:
  163. raise IOError(
  164. "Unable to download %r. Perhaps there is no internet "
  165. "connection? If there is, please report this problem."
  166. % os.path.basename(file_name)
  167. )
  168. def _chunk_read(response, local_file, chunk_size=8192, initial_size=0):
  169. """Download a file chunk by chunk and show advancement
  170. Can also be used when resuming downloads over http.
  171. Parameters
  172. ----------
  173. response: urllib.response.addinfourl
  174. Response to the download request in order to get file size.
  175. local_file: file
  176. Hard disk file where data should be written.
  177. chunk_size: integer, optional
  178. Size of downloaded chunks. Default: 8192
  179. initial_size: int, optional
  180. If resuming, indicate the initial size of the file.
  181. """
  182. # Adapted from NISL:
  183. # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
  184. bytes_so_far = initial_size
  185. # Returns only amount left to download when resuming, not the size of the
  186. # entire file
  187. total_size = int(response.headers["Content-Length"].strip())
  188. total_size += initial_size
  189. progress = StdoutProgressIndicator("Downloading")
  190. progress.start("", "bytes", total_size)
  191. while True:
  192. chunk = response.read(chunk_size)
  193. bytes_so_far += len(chunk)
  194. if not chunk:
  195. break
  196. _chunk_write(chunk, local_file, progress)
  197. progress.finish("Done")
  198. def _chunk_write(chunk, local_file, progress):
  199. """Write a chunk to file and update the progress bar"""
  200. local_file.write(chunk)
  201. progress.increase_progress(len(chunk))
  202. time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes
  203. def _sizeof_fmt(num):
  204. """Turn number of bytes into human-readable str"""
  205. units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
  206. decimals = [0, 0, 1, 2, 2, 2]
  207. """Human friendly file size"""
  208. if num > 1:
  209. exponent = min(int(log(num, 1024)), len(units) - 1)
  210. quotient = float(num) / 1024**exponent
  211. unit = units[exponent]
  212. num_decimals = decimals[exponent]
  213. format_string = "{0:.%sf} {1}" % num_decimals
  214. return format_string.format(quotient, unit)
  215. return "0 bytes" if num == 0 else "1 byte"