util.py 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001
  1. from __future__ import annotations
  2. import colorsys
  3. import contextlib
  4. import dataclasses
  5. import enum
  6. import gzip
  7. import importlib
  8. import importlib.util
  9. import itertools
  10. import json
  11. import logging
  12. import math
  13. import numbers
  14. import os
  15. import pathlib
  16. import platform
  17. import queue
  18. import random
  19. import re
  20. import secrets
  21. import shlex
  22. import socket
  23. import string
  24. import sys
  25. import tarfile
  26. import tempfile
  27. import threading
  28. import time
  29. import types
  30. import urllib
  31. from dataclasses import asdict, is_dataclass
  32. from datetime import date, datetime, timedelta
  33. from importlib import import_module
  34. from sys import getsizeof
  35. from types import ModuleType
  36. from typing import (
  37. IO,
  38. TYPE_CHECKING,
  39. Callable,
  40. Iterable,
  41. Mapping,
  42. Sequence,
  43. TextIO,
  44. Union,
  45. )
  46. from typing_extensions import Any, Generator, TypeGuard, TypeVar, deprecated
  47. import wandb
  48. import wandb.env
  49. from wandb.errors import (
  50. AuthenticationError,
  51. CommError,
  52. UsageError,
  53. WandbCoreNotAvailableError,
  54. )
  55. from wandb.errors.term import terminput
  56. from wandb.sdk.lib import filesystem, runid
  57. from wandb.sdk.lib.json_util import dump, dumps
  58. from wandb.sdk.lib.paths import FilePathStr, StrPath
  59. if TYPE_CHECKING:
  60. from requests import Response
  61. from wandb.sdk.artifacts.artifact import Artifact
  62. CheckRetryFnType = Callable[[Exception], Union[bool, timedelta]]
  63. T = TypeVar("T")
  64. logger = logging.getLogger(__name__)
  65. _not_importable = set()
  66. LAUNCH_JOB_ARTIFACT_SLOT_NAME = "_wandb_job"
  67. MAX_LINE_BYTES = (10 << 20) - (100 << 10) # imposed by back end
  68. IS_GIT = os.path.exists(os.path.join(os.path.dirname(__file__), "..", ".git"))
  69. # From https://docs.docker.com/engine/reference/commandline/tag/
  70. # "Name components may contain lowercase letters, digits and separators.
  71. # A separator is defined as a period, one or two underscores, or one or more dashes.
  72. # A name component may not start or end with a separator."
  73. DOCKER_IMAGE_NAME_SEPARATOR = "(?:__|[._]|[-]+)"
  74. RE_DOCKER_IMAGE_NAME_SEPARATOR_START = re.compile("^" + DOCKER_IMAGE_NAME_SEPARATOR)
  75. RE_DOCKER_IMAGE_NAME_SEPARATOR_END = re.compile(DOCKER_IMAGE_NAME_SEPARATOR + "$")
  76. RE_DOCKER_IMAGE_NAME_SEPARATOR_REPEAT = re.compile(DOCKER_IMAGE_NAME_SEPARATOR + "{2,}")
  77. RE_DOCKER_IMAGE_NAME_CHARS = re.compile(r"[^a-z0-9._\-]")
  78. POW_10_BYTES = [
  79. ("B", 10**0),
  80. ("KB", 10**3),
  81. ("MB", 10**6),
  82. ("GB", 10**9),
  83. ("TB", 10**12),
  84. ("PB", 10**15),
  85. ("EB", 10**18),
  86. ]
  87. POW_2_BYTES = [
  88. ("B", 2**0),
  89. ("KiB", 2**10),
  90. ("MiB", 2**20),
  91. ("GiB", 2**30),
  92. ("TiB", 2**40),
  93. ("PiB", 2**50),
  94. ("EiB", 2**60),
  95. ]
  96. def vendor_setup() -> Callable:
  97. """Create a function that restores user paths after vendor imports.
  98. This enables us to use the vendor directory for packages we don't depend on. Call
  99. the returned function after imports are complete. If you don't you may modify the
  100. user's path which is never good.
  101. Usage:
  102. ```python
  103. reset_path = vendor_setup()
  104. # do any vendor imports...
  105. reset_path()
  106. ```
  107. """
  108. original_path = [directory for directory in sys.path]
  109. def reset_import_path() -> None:
  110. sys.path = original_path
  111. parent_dir = os.path.abspath(os.path.dirname(__file__))
  112. vendor_dir = os.path.join(parent_dir, "vendor")
  113. vendor_packages = (
  114. "gql-0.2.0",
  115. "graphql-core-1.1",
  116. "watchdog_0_9_0",
  117. "promise-2.3.0",
  118. )
  119. package_dirs = [os.path.join(vendor_dir, p) for p in vendor_packages]
  120. for p in [vendor_dir] + package_dirs:
  121. if p not in sys.path:
  122. sys.path.insert(1, p)
  123. return reset_import_path
  124. def vendor_import(name: str) -> Any:
  125. reset_path = vendor_setup()
  126. module = import_module(name)
  127. reset_path()
  128. return module
  129. class LazyModuleState:
  130. def __init__(self, module: types.ModuleType) -> None:
  131. self.module = module
  132. self.load_started = False
  133. self.lock = threading.RLock()
  134. def load(self) -> None:
  135. with self.lock:
  136. if self.load_started:
  137. return
  138. self.load_started = True
  139. assert self.module.__spec__ is not None
  140. assert self.module.__spec__.loader is not None
  141. self.module.__spec__.loader.exec_module(self.module)
  142. self.module.__class__ = types.ModuleType
  143. # Set the submodule as an attribute on the parent module
  144. # This enables access to the submodule via normal attribute access.
  145. parent, _, child = self.module.__name__.rpartition(".")
  146. if parent:
  147. parent_module = sys.modules[parent]
  148. setattr(parent_module, child, self.module)
  149. class LazyModule(types.ModuleType):
  150. def __getattribute__(self, name: str) -> Any:
  151. state = object.__getattribute__(self, "__lazy_module_state__")
  152. state.load()
  153. return object.__getattribute__(self, name)
  154. def __setattr__(self, name: str, value: Any) -> None:
  155. state = object.__getattribute__(self, "__lazy_module_state__")
  156. state.load()
  157. object.__setattr__(self, name, value)
  158. def __delattr__(self, name: str) -> None:
  159. state = object.__getattribute__(self, "__lazy_module_state__")
  160. state.load()
  161. object.__delattr__(self, name)
  162. def import_module_lazy(name: str) -> types.ModuleType:
  163. """Import a module lazily, only when it is used.
  164. Inspired by importlib.util.LazyLoader, but improved so that the module loading is
  165. thread-safe. Circular dependency between modules can lead to a deadlock if the two
  166. modules are loaded from different threads.
  167. :param (str) name: Dot-separated module path. E.g., 'scipy.stats'.
  168. """
  169. try:
  170. return sys.modules[name]
  171. except KeyError:
  172. spec = importlib.util.find_spec(name)
  173. if spec is None:
  174. raise ModuleNotFoundError
  175. module = importlib.util.module_from_spec(spec)
  176. module.__lazy_module_state__ = LazyModuleState(module) # type: ignore
  177. module.__class__ = LazyModule
  178. sys.modules[name] = module
  179. return module
  180. def get_module(
  181. name: str,
  182. required: str | None = None,
  183. lazy: bool = True,
  184. ) -> Any:
  185. """Return module or None. Absolute import is required.
  186. :param (str) name: Dot-separated module path. E.g., 'scipy.stats'.
  187. :param (str) required: A string to raise a ValueError if missing
  188. :param (bool) lazy: If True, return a lazy loader for the module.
  189. :return: (module|None) If import succeeds, the module will be returned.
  190. """
  191. if name not in _not_importable:
  192. try:
  193. if not lazy:
  194. return import_module(name)
  195. else:
  196. return import_module_lazy(name)
  197. except Exception:
  198. _not_importable.add(name)
  199. msg = f"Error importing optional module {name}"
  200. if required:
  201. logger.exception(msg)
  202. if required and name in _not_importable:
  203. raise wandb.Error(required)
  204. def get_optional_module(name) -> importlib.ModuleInterface | None: # type: ignore
  205. return get_module(name)
  206. np = get_module("numpy")
  207. pd_available = False
  208. pandas_spec = importlib.util.find_spec("pandas")
  209. if pandas_spec is not None:
  210. pd_available = True
  211. # TODO: Revisit these limits
  212. VALUE_BYTES_LIMIT = 100000
  213. @deprecated("Read the `app_url` setting from the appropriate Settings object.")
  214. def app_url(api_url: str) -> str:
  215. """Returns the URL for the W&B UI without a trailing slash."""
  216. if app_url := wandb.env.get_app_url():
  217. return str(app_url.strip("/"))
  218. return api_to_app_url(api_url)
  219. def api_to_app_url(api_url: str) -> str:
  220. """Convert the API URL to an app (UI) URL.
  221. Unlike the deprecated `app_url()`, this is a pure function: it does
  222. not consult environment variables.
  223. """
  224. if "://api.wandb.test" in api_url:
  225. # dev mode
  226. return api_url.replace("://api.", "://app.").strip("/")
  227. elif "://api.wandb." in api_url:
  228. # cloud
  229. return api_url.replace("://api.", "://").strip("/")
  230. elif "://api." in api_url:
  231. # onprem cloud
  232. return api_url.replace("://api.", "://app.").strip("/")
  233. # wandb/local
  234. return api_url
  235. def get_full_typename(o: Any) -> Any:
  236. """Determine types based on type names.
  237. Avoids needing to to import (and therefore depend on) PyTorch, TensorFlow, etc.
  238. """
  239. instance_name = o.__class__.__module__ + "." + o.__class__.__name__
  240. if instance_name in ["builtins.module", "__builtin__.module"]:
  241. return o.__name__
  242. else:
  243. return instance_name
  244. def get_h5_typename(o: Any) -> Any:
  245. typename = get_full_typename(o)
  246. if is_tf_tensor_typename(typename):
  247. return "tensorflow.Tensor"
  248. elif is_pytorch_tensor_typename(typename):
  249. return "torch.Tensor"
  250. else:
  251. return o.__class__.__module__.split(".")[0] + "." + o.__class__.__name__
  252. def is_uri(string: str) -> bool:
  253. parsed_uri = urllib.parse.urlparse(string)
  254. return len(parsed_uri.scheme) > 0
  255. def local_file_uri_to_path(uri: str) -> str:
  256. """Convert URI to local filesystem path.
  257. No-op if the uri does not have the expected scheme.
  258. """
  259. path = urllib.parse.urlparse(uri).path if uri.startswith("file:") else uri
  260. return urllib.request.url2pathname(path)
  261. def get_local_path_or_none(path_or_uri: str) -> str | None:
  262. """Return path if local, None otherwise.
  263. Return None if the argument is a local path (not a scheme or file:///). Otherwise
  264. return `path_or_uri`.
  265. """
  266. parsed_uri = urllib.parse.urlparse(path_or_uri)
  267. if (
  268. len(parsed_uri.scheme) == 0
  269. or parsed_uri.scheme == "file"
  270. and len(parsed_uri.netloc) == 0
  271. ):
  272. return local_file_uri_to_path(path_or_uri)
  273. else:
  274. return None
  275. def check_windows_valid_filename(path: int | str) -> bool:
  276. r"""Verify that the given path does not contain any invalid characters for a Windows filename.
  277. Windows filenames cannot contain the following characters:
  278. < > : " \ / | ? *
  279. For more details, refer to the official documentation:
  280. https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
  281. Args:
  282. path: The file path to check, which can be either an integer or a string.
  283. Returns:
  284. bool: True if the path does not contain any invalid characters, False otherwise.
  285. """
  286. return not bool(re.search(r'[<>:"\\?*]', path)) # type: ignore
  287. def make_file_path_upload_safe(path: str) -> str:
  288. r"""Makes the provide path safe for file upload.
  289. The filename is made safe by:
  290. 1. Removing any leading slashes to prevent writing to absolute paths
  291. 2. Replacing '.' and '..' with underscores to prevent directory traversal attacks
  292. Raises:
  293. ValueError: If running on Windows and the key contains invalid filename characters
  294. (\, :, *, ?, ", <, >, |)
  295. """
  296. sys_platform = platform.system()
  297. if sys_platform == "Windows" and not check_windows_valid_filename(path):
  298. raise ValueError(
  299. f"Path {path} is invalid. Please remove invalid filename characters"
  300. r' (\, :, *, ?, ", <, >, |)'
  301. )
  302. # On Windows, convert forward slashes to backslashes.
  303. # This ensures that the key is a valid filename on Windows.
  304. if sys_platform == "Windows":
  305. path = str(path).replace("/", os.sep)
  306. # Avoid writing to absolute paths by striping any leading slashes.
  307. # The key has already been validated for windows operating systems in util.check_windows_valid_filename
  308. # This ensures the key does not contain invalid characters for windows, such as '\' or ':'.
  309. # So we can check only for '/' in the key.
  310. path = path.lstrip(os.sep)
  311. # Avoid directory traversal by replacing dots with underscores.
  312. paths = path.split(os.sep)
  313. safe_paths = [
  314. p.replace(".", "_") if p in (os.curdir, os.pardir) else p for p in paths
  315. ]
  316. # Recombine the key into a relative path.
  317. return os.sep.join(safe_paths)
  318. def make_tarfile(
  319. output_filename: str,
  320. source_dir: str,
  321. archive_name: str,
  322. custom_filter: Callable | None = None,
  323. ) -> None:
  324. # Helper for filtering out modification timestamps
  325. def _filter_timestamps(tar_info: tarfile.TarInfo) -> tarfile.TarInfo | None:
  326. tar_info.mtime = 0
  327. return tar_info if custom_filter is None else custom_filter(tar_info)
  328. descriptor, unzipped_filename = tempfile.mkstemp()
  329. try:
  330. with tarfile.open(unzipped_filename, "w") as tar:
  331. tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
  332. # When gzipping the tar, don't include the tar's filename or modification time in the
  333. # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
  334. with gzip.GzipFile(
  335. filename="", fileobj=open(output_filename, "wb"), mode="wb", mtime=0
  336. ) as gzipped_tar, open(unzipped_filename, "rb") as tar_file:
  337. gzipped_tar.write(tar_file.read())
  338. finally:
  339. os.close(descriptor)
  340. os.remove(unzipped_filename)
  341. def is_tf_tensor(obj: Any) -> bool:
  342. import tensorflow # type: ignore
  343. return isinstance(obj, tensorflow.Tensor)
  344. def is_tf_tensor_typename(typename: str) -> bool:
  345. return typename.startswith("tensorflow.") and (
  346. "Tensor" in typename or "Variable" in typename
  347. )
  348. def is_tf_eager_tensor_typename(typename: str) -> bool:
  349. return typename.startswith("tensorflow.") and ("EagerTensor" in typename)
  350. def is_pytorch_tensor(obj: Any) -> bool:
  351. import torch # type: ignore
  352. return isinstance(obj, torch.Tensor)
  353. def is_pytorch_tensor_typename(typename: str) -> bool:
  354. return typename.startswith("torch.") and (
  355. "Tensor" in typename or "Variable" in typename
  356. )
  357. def is_jax_tensor_typename(typename: str) -> bool:
  358. return typename.startswith("jaxlib.") and "Array" in typename
  359. def get_jax_tensor(obj: Any) -> Any:
  360. import jax # type: ignore
  361. return jax.device_get(obj)
  362. def is_fastai_tensor_typename(typename: str) -> bool:
  363. return typename.startswith("fastai.") and ("Tensor" in typename)
  364. def is_pandas_data_frame_typename(typename: str) -> bool:
  365. return typename.startswith("pandas.") and "DataFrame" in typename
  366. def is_matplotlib_typename(typename: str) -> bool:
  367. return typename.startswith("matplotlib.")
  368. def is_plotly_typename(typename: str) -> bool:
  369. return typename.startswith("plotly.")
  370. def is_plotly_figure_typename(typename: str) -> bool:
  371. return typename.startswith("plotly.") and typename.endswith(".Figure")
  372. def is_numpy_array(obj: Any) -> bool:
  373. return np and isinstance(obj, np.ndarray)
  374. def is_pandas_data_frame(obj: Any) -> bool:
  375. if pd_available:
  376. import pandas as pd
  377. return isinstance(obj, pd.DataFrame)
  378. else:
  379. return is_pandas_data_frame_typename(get_full_typename(obj))
  380. def ensure_matplotlib_figure(obj: Any) -> Any:
  381. """Extract the current figure from a matplotlib object.
  382. Return the object itself if it's a figure.
  383. Raises ValueError if the object can't be converted.
  384. """
  385. import matplotlib # type: ignore
  386. from matplotlib.figure import Figure # type: ignore
  387. # there are combinations of plotly and matplotlib versions that don't work well together,
  388. # this patches matplotlib to add a removed method that plotly assumes exists
  389. from matplotlib.spines import Spine # type: ignore
  390. def is_frame_like(self: Any) -> bool:
  391. """Return True if directly on axes frame.
  392. This is useful for determining if a spine is the edge of an
  393. old style MPL plot. If so, this function will return True.
  394. """
  395. position = self._position or ("outward", 0.0)
  396. if isinstance(position, str):
  397. if position == "center":
  398. position = ("axes", 0.5)
  399. elif position == "zero":
  400. position = ("data", 0)
  401. if len(position) != 2:
  402. raise ValueError("position should be 2-tuple")
  403. position_type, amount = position # type: ignore
  404. if position_type == "outward" and amount == 0:
  405. return True
  406. else:
  407. return False
  408. Spine.is_frame_like = is_frame_like
  409. if obj == matplotlib.pyplot:
  410. obj = obj.gcf()
  411. elif not isinstance(obj, Figure):
  412. if hasattr(obj, "figure"):
  413. obj = obj.figure
  414. # Some matplotlib objects have a figure function
  415. if not isinstance(obj, Figure):
  416. raise ValueError(
  417. "Only matplotlib.pyplot or matplotlib.pyplot.Figure objects are accepted."
  418. )
  419. return obj
  420. def matplotlib_to_plotly(obj: Any) -> Any:
  421. obj = ensure_matplotlib_figure(obj)
  422. tools = get_module(
  423. "plotly.tools",
  424. required=(
  425. "plotly is required to log interactive plots, install with: "
  426. "`pip install plotly` or convert the plot to an image with `wandb.Image(plt)`"
  427. ),
  428. )
  429. return tools.mpl_to_plotly(obj)
  430. def matplotlib_contains_images(obj: Any) -> bool:
  431. obj = ensure_matplotlib_figure(obj)
  432. return any(len(ax.images) > 0 for ax in obj.axes)
  433. def _numpy_generic_convert(obj: Any) -> Any:
  434. obj = obj.item()
  435. if isinstance(obj, float) and math.isnan(obj):
  436. obj = None
  437. elif isinstance(obj, np.generic) and (
  438. obj.dtype.kind == "f" or obj.dtype == "bfloat16"
  439. ):
  440. # obj is a numpy float with precision greater than that of native python float
  441. # (i.e., float96 or float128) or it is of custom type such as bfloat16.
  442. # in these cases, obj.item() does not return a native
  443. # python float (in the first case - to avoid loss of precision,
  444. # so we need to explicitly cast this down to a 64bit float)
  445. obj = float(obj)
  446. return obj
  447. def _sanitize_numpy_keys(
  448. d: dict,
  449. visited: dict[int, dict] | None = None,
  450. ) -> tuple[dict, bool]:
  451. """Returns a dictionary where all NumPy keys are converted.
  452. Args:
  453. d: The dictionary to sanitize.
  454. Returns:
  455. A sanitized dictionary, and a boolean indicating whether anything was
  456. changed.
  457. """
  458. out: dict[Any, Any] = dict()
  459. converted = False
  460. # Work with recursive dictionaries: if a dictionary has already been
  461. # converted, reuse its converted value to retain the recursive structure
  462. # of the input.
  463. if visited is None:
  464. visited = {id(d): out}
  465. elif id(d) in visited:
  466. return visited[id(d)], False
  467. visited[id(d)] = out
  468. for key, value in d.items():
  469. if isinstance(value, dict):
  470. value, converted_value = _sanitize_numpy_keys(value, visited)
  471. converted |= converted_value
  472. if isinstance(key, np.generic):
  473. key = _numpy_generic_convert(key)
  474. converted = True
  475. out[key] = value
  476. return out, converted
  477. def json_friendly( # noqa: C901
  478. obj: Any,
  479. ) -> tuple[Any, bool] | tuple[None | str | float, bool]:
  480. """Convert an object into something that's more becoming of JSON."""
  481. converted = True
  482. typename = get_full_typename(obj)
  483. if is_tf_eager_tensor_typename(typename):
  484. obj = obj.numpy()
  485. elif is_tf_tensor_typename(typename):
  486. try:
  487. obj = obj.eval()
  488. except RuntimeError:
  489. obj = obj.numpy()
  490. elif is_pytorch_tensor_typename(typename) or is_fastai_tensor_typename(typename):
  491. try:
  492. if obj.requires_grad:
  493. obj = obj.detach()
  494. except AttributeError:
  495. pass # before 0.4 is only present on variables
  496. try:
  497. obj = obj.data
  498. except RuntimeError:
  499. pass # happens for Tensors before 0.4
  500. if obj.size():
  501. obj = obj.cpu().detach().numpy()
  502. else:
  503. return obj.item(), True
  504. elif is_jax_tensor_typename(typename):
  505. obj = get_jax_tensor(obj)
  506. if is_numpy_array(obj):
  507. if obj.size == 1:
  508. obj = obj.flatten()[0]
  509. elif obj.size <= 32:
  510. obj = obj.tolist()
  511. elif np and isinstance(obj, np.generic):
  512. obj = _numpy_generic_convert(obj)
  513. elif isinstance(obj, bytes):
  514. obj = obj.decode("utf-8")
  515. elif isinstance(obj, (datetime, date)):
  516. obj = obj.isoformat()
  517. elif callable(obj):
  518. obj = (
  519. f"{obj.__module__}.{obj.__qualname__}"
  520. if hasattr(obj, "__qualname__") and hasattr(obj, "__module__")
  521. else str(obj)
  522. )
  523. elif isinstance(obj, float) and math.isnan(obj):
  524. obj = None
  525. elif isinstance(obj, dict) and np:
  526. obj, converted = _sanitize_numpy_keys(obj)
  527. elif isinstance(obj, set):
  528. # set is not json serializable, so we convert it to tuple
  529. obj = tuple(obj)
  530. elif isinstance(obj, enum.Enum):
  531. obj = obj.name
  532. else:
  533. converted = False
  534. if getsizeof(obj) > VALUE_BYTES_LIMIT:
  535. wandb.termwarn(
  536. f"Serializing object of type {type(obj).__name__} that is {getsizeof(obj)} bytes"
  537. )
  538. return obj, converted
  539. def json_friendly_val(val: Any) -> Any:
  540. """Make any value (including dict, slice, sequence, dataclass) JSON friendly."""
  541. converted: dict | list
  542. if isinstance(val, dict):
  543. converted = {}
  544. for key, value in val.items():
  545. converted[key] = json_friendly_val(value)
  546. return converted
  547. if isinstance(val, slice):
  548. converted = dict(
  549. slice_start=val.start, slice_step=val.step, slice_stop=val.stop
  550. )
  551. return converted
  552. val, _ = json_friendly(val)
  553. if isinstance(val, Sequence) and not isinstance(val, str):
  554. converted = []
  555. for value in val:
  556. converted.append(json_friendly_val(value))
  557. return converted
  558. if is_dataclass(val) and not isinstance(val, type):
  559. converted = asdict(val)
  560. return json_friendly_val(converted)
  561. else:
  562. if val.__class__.__module__ not in ("builtins", "__builtin__"):
  563. val = str(val)
  564. return val
  565. def alias_is_version_index(alias: str) -> bool:
  566. return len(alias) >= 2 and alias[0] == "v" and alias[1:].isnumeric()
  567. def convert_plots(obj: Any) -> Any:
  568. if is_matplotlib_typename(get_full_typename(obj)):
  569. tools = get_module(
  570. "plotly.tools",
  571. required=(
  572. "plotly is required to log interactive plots, install with: "
  573. "`pip install plotly` or convert the plot to an image with `wandb.Image(plt)`"
  574. ),
  575. )
  576. obj = tools.mpl_to_plotly(obj)
  577. if is_plotly_typename(get_full_typename(obj)):
  578. return {"_type": "plotly", "plot": obj.to_plotly_json()}
  579. else:
  580. return obj
  581. def maybe_compress_history(obj: Any) -> tuple[Any, bool]:
  582. if np and isinstance(obj, np.ndarray) and obj.size > 32:
  583. return wandb.Histogram(obj, num_bins=32).to_json(), True
  584. else:
  585. return obj, False
  586. def maybe_compress_summary(obj: Any, h5_typename: str) -> tuple[Any, bool]:
  587. if np and isinstance(obj, np.ndarray) and obj.size > 32:
  588. return (
  589. {
  590. "_type": h5_typename, # may not be ndarray
  591. "var": np.var(obj).item(),
  592. "mean": np.mean(obj).item(),
  593. "min": np.amin(obj).item(),
  594. "max": np.amax(obj).item(),
  595. "10%": np.percentile(obj, 10),
  596. "25%": np.percentile(obj, 25),
  597. "75%": np.percentile(obj, 75),
  598. "90%": np.percentile(obj, 90),
  599. "size": obj.size,
  600. },
  601. True,
  602. )
  603. else:
  604. return obj, False
  605. def launch_browser(attempt_launch_browser: bool = True) -> bool:
  606. """Decide if we should launch a browser."""
  607. _display_variables = ["DISPLAY", "WAYLAND_DISPLAY", "MIR_SOCKET"]
  608. _webbrowser_names_blocklist = ["www-browser", "lynx", "links", "elinks", "w3m"]
  609. import webbrowser
  610. launch_browser = attempt_launch_browser
  611. if launch_browser:
  612. if "linux" in sys.platform and not any(
  613. os.getenv(var) for var in _display_variables
  614. ):
  615. launch_browser = False
  616. try:
  617. browser = webbrowser.get()
  618. if hasattr(browser, "name") and browser.name in _webbrowser_names_blocklist:
  619. launch_browser = False
  620. except webbrowser.Error:
  621. launch_browser = False
  622. return launch_browser
  623. def generate_id(length: int = 8) -> str:
  624. # Do not use this; use wandb.sdk.lib.runid.generate_id instead.
  625. # This is kept only for legacy code.
  626. return runid.generate_id(length)
  627. def parse_tfjob_config() -> Any:
  628. """Attempt to parse TFJob config, returning False if it can't find it."""
  629. if os.getenv("TF_CONFIG"):
  630. try:
  631. return json.loads(os.environ["TF_CONFIG"])
  632. except ValueError:
  633. return False
  634. else:
  635. return False
  636. class WandBJSONEncoder(json.JSONEncoder):
  637. """A JSON Encoder that handles some extra types."""
  638. def default(self, obj: Any) -> Any:
  639. if hasattr(obj, "json_encode"):
  640. return obj.json_encode()
  641. # if hasattr(obj, 'to_json'):
  642. # return obj.to_json()
  643. tmp_obj, converted = json_friendly(obj)
  644. if converted:
  645. return tmp_obj
  646. return json.JSONEncoder.default(self, obj)
  647. class WandBJSONEncoderOld(json.JSONEncoder):
  648. """A JSON Encoder that handles some extra types."""
  649. def default(self, obj: Any) -> Any:
  650. tmp_obj, converted = json_friendly(obj)
  651. tmp_obj, compressed = maybe_compress_summary(tmp_obj, get_h5_typename(obj))
  652. if converted:
  653. return tmp_obj
  654. return json.JSONEncoder.default(self, tmp_obj)
  655. class WandBHistoryJSONEncoder(json.JSONEncoder):
  656. """A JSON Encoder that handles some extra types.
  657. This encoder turns numpy like objects with a size > 32 into histograms.
  658. """
  659. def default(self, obj: Any) -> Any:
  660. obj, converted = json_friendly(obj)
  661. obj, compressed = maybe_compress_history(obj)
  662. if converted:
  663. return obj
  664. return json.JSONEncoder.default(self, obj)
  665. class JSONEncoderUncompressed(json.JSONEncoder):
  666. """A JSON Encoder that handles some extra types.
  667. This encoder turns numpy like objects with a size > 32 into histograms.
  668. """
  669. def default(self, obj: Any) -> Any:
  670. if is_numpy_array(obj):
  671. return obj.tolist()
  672. elif np and isinstance(obj, np.number):
  673. return obj.item()
  674. elif np and isinstance(obj, np.generic):
  675. obj = obj.item()
  676. return json.JSONEncoder.default(self, obj)
  677. def json_dump_safer(obj: Any, fp: IO[str], **kwargs: Any) -> None:
  678. """Convert obj to json, with some extra encodable types."""
  679. return dump(obj, fp, cls=WandBJSONEncoder, **kwargs)
  680. def json_dumps_safer(obj: Any, **kwargs: Any) -> str:
  681. """Convert obj to json, with some extra encodable types."""
  682. return dumps(obj, cls=WandBJSONEncoder, **kwargs)
  683. # This is used for dumping raw json into files
  684. def json_dump_uncompressed(obj: Any, fp: IO[str], **kwargs: Any) -> None:
  685. """Convert obj to json, with some extra encodable types."""
  686. return dump(obj, fp, cls=JSONEncoderUncompressed, **kwargs)
  687. def json_dumps_safer_history(obj: Any, **kwargs: Any) -> str:
  688. """Convert obj to json, with some extra encodable types, including histograms."""
  689. return dumps(obj, cls=WandBHistoryJSONEncoder, **kwargs)
  690. def make_json_if_not_number(
  691. v: int | float | str | Mapping | Sequence,
  692. ) -> int | float | str:
  693. """If v is not a basic type convert it to json."""
  694. if isinstance(v, (float, int)):
  695. return v
  696. return json_dumps_safer(v)
  697. def make_safe_for_json(obj: Any) -> Any:
  698. """Replace invalid json floats with strings. Also converts to lists and dicts."""
  699. if isinstance(obj, Mapping):
  700. return {k: make_safe_for_json(v) for k, v in obj.items()}
  701. elif isinstance(obj, str):
  702. # str's are Sequence, so we need to short-circuit
  703. return obj
  704. elif isinstance(obj, Sequence):
  705. return [make_safe_for_json(v) for v in obj]
  706. elif isinstance(obj, float):
  707. # W&B backend and UI handle these strings
  708. if obj != obj: # standard way to check for NaN
  709. return "NaN"
  710. elif obj == float("+inf"):
  711. return "Infinity"
  712. elif obj == float("-inf"):
  713. return "-Infinity"
  714. return obj
  715. def no_retry_4xx(e: Exception) -> bool:
  716. from requests import HTTPError
  717. if not isinstance(e, HTTPError):
  718. return True
  719. assert e.response is not None
  720. if not (400 <= e.response.status_code < 500) or e.response.status_code == 429:
  721. return True
  722. body = json.loads(e.response.content)
  723. raise UsageError(body["errors"][0]["message"])
  724. def parse_backend_error_messages(response: Response) -> list[str]:
  725. """Returns error messages stored in a backend response.
  726. If the response is not in an expected format, an empty list is returned.
  727. Args:
  728. response: A response to an HTTP request to the W&B server.
  729. """
  730. from requests import JSONDecodeError
  731. try:
  732. data = response.json()
  733. except JSONDecodeError:
  734. return []
  735. if not isinstance(data, dict):
  736. return []
  737. # Backend error values are returned in one of two ways:
  738. # - A string containing the error message
  739. # - A JSON object with a "message" field that is a string
  740. def get_message(error: Any) -> str | None:
  741. if isinstance(error, str):
  742. return error
  743. elif (
  744. isinstance(error, dict)
  745. and (message := error.get("message"))
  746. and isinstance(message, str)
  747. ):
  748. return message
  749. else:
  750. return None
  751. # The response can contain an "error" field with a single error
  752. # or an "errors" field with a list of errors.
  753. if error := data.get("error"):
  754. message = get_message(error)
  755. return [message] if message else []
  756. elif (errors := data.get("errors")) and isinstance(errors, list):
  757. messages: list[str] = []
  758. for error in errors:
  759. message = get_message(error)
  760. if message:
  761. messages.append(message)
  762. return messages
  763. else:
  764. return []
  765. def no_retry_auth(e: Any) -> bool:
  766. from requests import HTTPError
  767. if hasattr(e, "exception"):
  768. e = e.exception
  769. if not isinstance(e, HTTPError):
  770. return True
  771. if e.response is None:
  772. return True
  773. # Don't retry bad request errors; raise immediately
  774. if e.response.status_code in (400, 409):
  775. return False
  776. # Retry all non-forbidden/unauthorized/not-found errors.
  777. if e.response.status_code not in (401, 403, 404):
  778. return True
  779. # Crash with more informational message on forbidden/unauthorized errors.
  780. # UnauthorizedError
  781. if e.response.status_code == 401:
  782. raise AuthenticationError(
  783. "The API key you provided is either invalid or missing. "
  784. f"If the `{wandb.env.API_KEY}` environment variable is set, make sure it is correct. "
  785. "Otherwise, to resolve this issue, you may try running the 'wandb login --relogin' command. "
  786. "If you are using a local server, make sure that you're using the correct hostname. "
  787. "If you're not sure, you can try logging in again using the 'wandb login --relogin --host [hostname]' command."
  788. f"(Error {e.response.status_code}: {e.response.reason})"
  789. )
  790. # ForbiddenError
  791. if e.response.status_code == 403:
  792. if wandb.run:
  793. raise CommError(f"Permission denied to access {wandb.run.path}")
  794. else:
  795. raise CommError(
  796. "It appears that you do not have permission to access the requested resource. "
  797. "Please reach out to the project owner to grant you access. "
  798. "If you have the correct permissions, verify that there are no issues with your networking setup."
  799. f"(Error {e.response.status_code}: {e.response.reason})"
  800. )
  801. # NotFoundError
  802. if e.response.status_code == 404:
  803. # If error message is empty, raise a more generic NotFoundError message.
  804. if parse_backend_error_messages(e.response):
  805. return False
  806. else:
  807. raise LookupError(
  808. f"Failed to find resource. Please make sure you have the correct resource path. "
  809. f"(Error {e.response.status_code}: {e.response.reason})"
  810. )
  811. return False
  812. def check_retry_conflict(e: Any) -> bool | None:
  813. """Check if the exception is a conflict type so it can be retried.
  814. Returns:
  815. True - Should retry this operation
  816. False - Should not retry this operation
  817. None - No decision, let someone else decide
  818. """
  819. from requests import HTTPError
  820. if hasattr(e, "exception"):
  821. e = e.exception
  822. if isinstance(e, HTTPError) and e.response is not None:
  823. if e.response.status_code == 409:
  824. return True
  825. return None
  826. def check_retry_conflict_or_gone(e: Any) -> bool | None:
  827. """Check if the exception is a conflict or gone type, so it can be retried or not.
  828. Returns:
  829. True - Should retry this operation
  830. False - Should not retry this operation
  831. None - No decision, let someone else decide
  832. """
  833. from requests import HTTPError
  834. if hasattr(e, "exception"):
  835. e = e.exception
  836. if isinstance(e, HTTPError) and e.response is not None:
  837. if e.response.status_code == 409:
  838. return True
  839. if e.response.status_code == 410:
  840. return False
  841. return None
  842. def make_check_retry_fn(
  843. fallback_retry_fn: CheckRetryFnType,
  844. check_fn: Callable[[Exception], bool | None],
  845. check_timedelta: timedelta | None = None,
  846. ) -> CheckRetryFnType:
  847. """Return a check_retry_fn which can be used by lib.Retry().
  848. Args:
  849. fallback_fn: Use this function if check_fn didn't decide if a retry should happen.
  850. check_fn: Function which returns bool if retry should happen or None if unsure.
  851. check_timedelta: Optional retry timeout if we check_fn matches the exception
  852. """
  853. def check_retry_fn(e: Exception) -> bool | timedelta:
  854. check = check_fn(e)
  855. if check is None:
  856. return fallback_retry_fn(e)
  857. if check is False:
  858. return False
  859. if check_timedelta:
  860. return check_timedelta
  861. return True
  862. return check_retry_fn
  863. def find_runner(program: str) -> None | list | list[str]:
  864. """Return a command that will run program.
  865. Args:
  866. program: The string name of the program to try to run.
  867. Returns:
  868. commandline list of strings to run the program (eg. with subprocess.call()) or None
  869. """
  870. if os.path.isfile(program) and not os.access(program, os.X_OK):
  871. # program is a path to a non-executable file
  872. try:
  873. opened = open(program)
  874. except OSError: # PermissionError doesn't exist in 2.7
  875. return None
  876. first_line = opened.readline().strip()
  877. if first_line.startswith("#!"):
  878. return shlex.split(first_line[2:])
  879. if program.endswith(".py"):
  880. return [sys.executable]
  881. return None
  882. def downsample(values: Sequence, target_length: int) -> list:
  883. """Downsample 1d values to target_length, including start and end.
  884. Algorithm just rounds index down.
  885. Values can be any sequence, including a generator.
  886. """
  887. if not target_length > 1:
  888. raise UsageError("target_length must be > 1")
  889. values = list(values)
  890. if len(values) < target_length:
  891. return values
  892. ratio = float(len(values) - 1) / (target_length - 1)
  893. result = []
  894. for i in range(target_length):
  895. result.append(values[int(i * ratio)])
  896. return result
  897. def has_num(dictionary: Mapping, key: Any) -> bool:
  898. return key in dictionary and isinstance(dictionary[key], numbers.Number)
  899. def docker_image_regex(image: str) -> Any:
  900. """Regex match for valid docker image names."""
  901. if image:
  902. return re.match(
  903. r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$",
  904. image,
  905. )
  906. return None
  907. def image_from_docker_args(args: list[str]) -> str | None:
  908. """Scan docker run args and attempt to find the most likely docker image argument.
  909. It excludes any arguments that start with a dash, and the argument after it if it
  910. isn't a boolean switch. This can be improved, we currently fallback gracefully when
  911. this fails.
  912. """
  913. bool_args = [
  914. "-t",
  915. "--tty",
  916. "--rm",
  917. "--privileged",
  918. "--oom-kill-disable",
  919. "--no-healthcheck",
  920. "-i",
  921. "--interactive",
  922. "--init",
  923. "--help",
  924. "--detach",
  925. "-d",
  926. "--sig-proxy",
  927. "-it",
  928. "-itd",
  929. ]
  930. last_flag = -2
  931. last_arg = ""
  932. possible_images = []
  933. if len(args) > 0 and args[0] == "run":
  934. args.pop(0)
  935. for i, arg in enumerate(args):
  936. if arg.startswith("-"):
  937. last_flag = i
  938. last_arg = arg
  939. elif "@sha256:" in arg:
  940. # Because our regex doesn't match digests
  941. possible_images.append(arg)
  942. elif docker_image_regex(arg):
  943. if last_flag == i - 2:
  944. possible_images.append(arg)
  945. elif "=" in last_arg:
  946. possible_images.append(arg)
  947. elif last_arg in bool_args and last_flag == i - 1:
  948. possible_images.append(arg)
  949. most_likely = None
  950. for img in possible_images:
  951. if ":" in img or "@" in img or "/" in img:
  952. most_likely = img
  953. break
  954. if most_likely is None and len(possible_images) > 0:
  955. most_likely = possible_images[0]
  956. return most_likely
  957. def load_yaml(file: Any) -> Any:
  958. import yaml
  959. return yaml.safe_load(file)
  960. def image_id_from_k8s() -> str | None:
  961. """Ping the k8s metadata service for the image id.
  962. Specify the KUBERNETES_NAMESPACE environment variable if your pods are not in the
  963. default namespace:
  964. - name: KUBERNETES_NAMESPACE valueFrom:
  965. fieldRef:
  966. fieldPath: metadata.namespace
  967. """
  968. token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
  969. if not os.path.exists(token_path):
  970. return None
  971. try:
  972. with open(token_path) as token_file:
  973. token = token_file.read()
  974. except FileNotFoundError:
  975. logger.warning(f"Token file not found at {token_path}.")
  976. return None
  977. except PermissionError as e:
  978. current_uid = os.getuid()
  979. warning = (
  980. f"Unable to read the token file at {token_path} due to permission error ({e})."
  981. f"The current user id is {current_uid}. "
  982. "Consider changing the securityContext to run the container as the current user."
  983. )
  984. logger.warning(warning)
  985. wandb.termwarn(warning)
  986. return None
  987. if not token:
  988. return None
  989. import requests
  990. k8s_server = "https://{}:{}/api/v1/namespaces/{}/pods/{}".format(
  991. os.getenv("KUBERNETES_SERVICE_HOST"),
  992. os.getenv("KUBERNETES_PORT_443_TCP_PORT"),
  993. os.getenv("KUBERNETES_NAMESPACE", "default"),
  994. os.getenv("HOSTNAME"),
  995. )
  996. try:
  997. res = requests.get(
  998. k8s_server,
  999. verify="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
  1000. timeout=3,
  1001. headers={"Authorization": f"Bearer {token}"},
  1002. )
  1003. res.raise_for_status()
  1004. except requests.RequestException:
  1005. return None
  1006. try:
  1007. return str( # noqa: B005
  1008. res.json()["status"]["containerStatuses"][0]["imageID"]
  1009. ).strip("docker-pullable://")
  1010. except (ValueError, KeyError, IndexError):
  1011. logger.exception("Error checking kubernetes for image id")
  1012. return None
  1013. def async_call(target: Callable, timeout: int | float | None = None) -> Callable:
  1014. """Wrap a method to run in the background with an optional timeout.
  1015. Returns a new method that will call the original with any args, waiting for upto
  1016. timeout seconds. This new method blocks on the original and returns the result or
  1017. None if timeout was reached, along with the thread. You can check thread.is_alive()
  1018. to determine if a timeout was reached. If an exception is thrown in the thread, we
  1019. reraise it.
  1020. """
  1021. q: queue.Queue = queue.Queue()
  1022. def wrapped_target(q: queue.Queue, *args: Any, **kwargs: Any) -> Any:
  1023. try:
  1024. q.put(target(*args, **kwargs))
  1025. except Exception as e:
  1026. q.put(e)
  1027. def wrapper(
  1028. *args: Any, **kwargs: Any
  1029. ) -> tuple[Exception, threading.Thread] | tuple[None, threading.Thread]:
  1030. thread = threading.Thread(
  1031. target=wrapped_target, args=(q,) + args, kwargs=kwargs
  1032. )
  1033. thread.daemon = True
  1034. thread.start()
  1035. try:
  1036. result = q.get(True, timeout)
  1037. except queue.Empty:
  1038. return None, thread
  1039. if isinstance(result, Exception):
  1040. raise result.with_traceback(sys.exc_info()[2])
  1041. return result, thread
  1042. return wrapper
  1043. def read_many_from_queue(
  1044. q: queue.Queue, max_items: int, queue_timeout: int | float
  1045. ) -> list:
  1046. try:
  1047. item = q.get(True, queue_timeout)
  1048. except queue.Empty:
  1049. return []
  1050. items = [item]
  1051. for _ in range(max_items):
  1052. try:
  1053. item = q.get_nowait()
  1054. except queue.Empty:
  1055. return items
  1056. items.append(item)
  1057. return items
  1058. def stopwatch_now() -> float:
  1059. """Get a time value for interval comparisons.
  1060. When possible it is a monotonic clock to prevent backwards time issues.
  1061. """
  1062. return time.monotonic()
  1063. def class_colors(class_count: int) -> list[list[int]]:
  1064. # make class 0 black, and the rest equally spaced fully saturated hues
  1065. return [[0, 0, 0]] + [
  1066. colorsys.hsv_to_rgb(i / (class_count - 1.0), 1.0, 1.0) # type: ignore
  1067. for i in range(class_count - 1)
  1068. ]
  1069. def prompt_choices(
  1070. choices: Sequence[str],
  1071. input_timeout: float | None = None,
  1072. ) -> str:
  1073. """Prompt the user to choose from a list of options.
  1074. If exactly one choice is given, it is returned immediately.
  1075. Raises:
  1076. TimeoutError: if input_timeout is specified and expires.
  1077. NotATerminalError: if the output device is not capable.
  1078. KeyboardInterrupt: if the user aborts by pressing Ctrl+C.
  1079. """
  1080. if len(choices) == 1:
  1081. return choices[0]
  1082. for i, choice_str in enumerate(choices):
  1083. wandb.termlog(f"({i + 1}) {choice_str}")
  1084. while True:
  1085. choice = terminput("Enter your choice: ", timeout=input_timeout)
  1086. # If the user presses enter without typing anything, try again.
  1087. if not choice:
  1088. continue
  1089. idx = -1
  1090. with contextlib.suppress(ValueError):
  1091. idx = int(choice) - 1
  1092. if idx < 0 or idx > len(choices) - 1:
  1093. wandb.termwarn("Invalid choice")
  1094. continue
  1095. result = choices[idx]
  1096. wandb.termlog(f"You chose {result!r}")
  1097. return result
  1098. def guess_data_type(shape: Sequence[int], risky: bool = False) -> str | None:
  1099. """Infer the type of data based on the shape of the tensors.
  1100. Args:
  1101. shape (Sequence[int]): The shape of the data
  1102. risky(bool): some guesses are more likely to be wrong.
  1103. """
  1104. # (samples,) or (samples,logits)
  1105. if len(shape) in (1, 2):
  1106. return "label"
  1107. # Assume image mask like fashion mnist: (no color channel)
  1108. # This is risky because RNNs often have 3 dim tensors: batch, time, channels
  1109. if risky and len(shape) == 3:
  1110. return "image"
  1111. if len(shape) == 4:
  1112. if shape[-1] in (1, 3, 4):
  1113. # (samples, height, width, Y \ RGB \ RGBA)
  1114. return "image"
  1115. else:
  1116. # (samples, height, width, logits)
  1117. return "segmentation_mask"
  1118. return None
  1119. def download_file_from_url(
  1120. dest_path: str, source_url: str, api_key: str | None = None
  1121. ) -> None:
  1122. import requests
  1123. auth = ("api", api_key or "")
  1124. response = requests.get(
  1125. source_url,
  1126. auth=auth,
  1127. stream=True,
  1128. timeout=5,
  1129. )
  1130. response.raise_for_status()
  1131. if os.sep in dest_path:
  1132. filesystem.mkdir_exists_ok(os.path.dirname(dest_path))
  1133. with fsync_open(dest_path, "wb") as file:
  1134. for data in response.iter_content(chunk_size=1024):
  1135. file.write(data)
  1136. def download_file_into_memory(source_url: str, api_key: str | None = None) -> bytes:
  1137. import requests
  1138. auth = ("api", api_key or "")
  1139. response = requests.get(
  1140. source_url,
  1141. auth=auth,
  1142. stream=True,
  1143. timeout=5,
  1144. )
  1145. response.raise_for_status()
  1146. return response.content
  1147. def isatty(ob: IO) -> bool:
  1148. return hasattr(ob, "isatty") and ob.isatty()
  1149. def to_human_size(size: int, units: list[tuple[str, Any]] | None = None) -> str:
  1150. units = units or POW_10_BYTES
  1151. unit, value = units[0]
  1152. factor = round(float(size) / value, 1)
  1153. return (
  1154. f"{factor}{unit}"
  1155. if factor < 1024 or len(units) == 1
  1156. else to_human_size(size, units[1:])
  1157. )
  1158. def from_human_size(size: str, units: list[tuple[str, Any]] | None = None) -> int:
  1159. units = units or POW_10_BYTES
  1160. units_dict = {unit.upper(): value for (unit, value) in units}
  1161. regex = re.compile(
  1162. r"(\d+\.?\d*)\s*({})?".format("|".join(units_dict.keys())), re.IGNORECASE
  1163. )
  1164. match = re.match(regex, size)
  1165. if not match:
  1166. raise ValueError("size must be of the form `10`, `10B` or `10 B`.")
  1167. factor, unit = (
  1168. float(match.group(1)),
  1169. units_dict[match.group(2).upper()] if match.group(2) else 1,
  1170. )
  1171. return int(factor * unit)
  1172. def auto_project_name(program: str | None) -> str:
  1173. # if we're in git, set project name to git repo name + relative path within repo
  1174. from wandb.sdk.lib.gitlib import GitRepo
  1175. root_dir = GitRepo().root_dir
  1176. if root_dir is None:
  1177. return "uncategorized"
  1178. # On windows, GitRepo returns paths in unix style, but os.path is windows
  1179. # style. Coerce here.
  1180. root_dir = to_native_slash_path(root_dir)
  1181. repo_name = os.path.basename(root_dir)
  1182. if program is None:
  1183. return str(repo_name)
  1184. if not os.path.isabs(program):
  1185. program = os.path.join(os.curdir, program)
  1186. prog_dir = os.path.dirname(os.path.abspath(program))
  1187. if not prog_dir.startswith(root_dir):
  1188. return str(repo_name)
  1189. project = repo_name
  1190. sub_path = os.path.relpath(prog_dir, root_dir)
  1191. if sub_path != ".":
  1192. project += "-" + sub_path
  1193. return str(project.replace(os.sep, "_"))
  1194. # TODO(hugh): Deprecate version here and use wandb/sdk/lib/paths.py
  1195. def to_forward_slash_path(path: str) -> str:
  1196. if platform.system() == "Windows":
  1197. path = path.replace("\\", "/")
  1198. return path
  1199. # TODO(hugh): Deprecate version here and use wandb/sdk/lib/paths.py
  1200. def to_native_slash_path(path: str) -> FilePathStr:
  1201. return FilePathStr(path.replace("/", os.sep))
  1202. def check_and_warn_old(files: list[str]) -> bool:
  1203. if "wandb-metadata.json" in files:
  1204. wandb.termwarn("These runs were logged with a previous version of wandb.")
  1205. wandb.termwarn(
  1206. "Run pip install wandb<0.10.0 to get the old library and sync your runs."
  1207. )
  1208. return True
  1209. return False
  1210. class ImportMetaHook:
  1211. def __init__(self) -> None:
  1212. self.modules: dict[str, ModuleType] = dict()
  1213. self.on_import: dict[str, list] = dict()
  1214. def add(self, fullname: str, on_import: Callable) -> None:
  1215. self.on_import.setdefault(fullname, []).append(on_import)
  1216. def install(self) -> None:
  1217. sys.meta_path.insert(0, self) # type: ignore
  1218. def uninstall(self) -> None:
  1219. sys.meta_path.remove(self) # type: ignore
  1220. def find_module(
  1221. self, fullname: str, path: str | None = None
  1222. ) -> ImportMetaHook | None:
  1223. if fullname in self.on_import:
  1224. return self
  1225. return None
  1226. def load_module(self, fullname: str) -> ModuleType:
  1227. self.uninstall()
  1228. mod = importlib.import_module(fullname)
  1229. self.install()
  1230. self.modules[fullname] = mod
  1231. on_imports = self.on_import.get(fullname)
  1232. if on_imports:
  1233. for f in on_imports:
  1234. f()
  1235. return mod
  1236. def get_modules(self) -> tuple[str, ...]:
  1237. return tuple(self.modules)
  1238. def get_module(self, module: str) -> ModuleType:
  1239. return self.modules[module]
  1240. _import_hook: ImportMetaHook | None = None
  1241. def add_import_hook(fullname: str, on_import: Callable) -> None:
  1242. global _import_hook
  1243. if _import_hook is None:
  1244. _import_hook = ImportMetaHook()
  1245. _import_hook.install()
  1246. _import_hook.add(fullname, on_import)
  1247. def host_from_path(path: str | None) -> str:
  1248. """Return the host of the path."""
  1249. url = urllib.parse.urlparse(path)
  1250. return str(url.netloc)
  1251. def uri_from_path(path: str | None) -> str:
  1252. """Return the URI of the path."""
  1253. url = urllib.parse.urlparse(path)
  1254. uri = url.path if url.path[0] != "/" else url.path[1:]
  1255. return str(uri)
  1256. def is_unicode_safe(stream: TextIO) -> bool:
  1257. """Return True if the stream supports UTF-8."""
  1258. encoding = getattr(stream, "encoding", None)
  1259. return encoding.lower() in {"utf-8", "utf_8"} if encoding else False
  1260. def rand_alphanumeric(
  1261. length: int = 8, rand: ModuleType | random.Random | None = None
  1262. ) -> str:
  1263. wandb.termerror("rand_alphanumeric is deprecated, use 'secrets.token_hex'")
  1264. rand = rand or random
  1265. return "".join(rand.choice("0123456789ABCDEF") for _ in range(length))
  1266. @contextlib.contextmanager
  1267. def fsync_open(
  1268. path: StrPath, mode: str = "w", encoding: str | None = None
  1269. ) -> Generator[IO[Any], None, None]:
  1270. """Open a path for I/O and guarantee that the file is flushed and synced."""
  1271. with open(path, mode, encoding=encoding) as f:
  1272. yield f
  1273. f.flush()
  1274. os.fsync(f.fileno())
  1275. def _is_kaggle() -> bool:
  1276. return (
  1277. os.getenv("KAGGLE_KERNEL_RUN_TYPE") is not None
  1278. or "kaggle_environments" in sys.modules
  1279. )
  1280. def _has_internet() -> bool:
  1281. """Returns whether we have internet access.
  1282. Checks for internet access by attempting to open a DNS connection to
  1283. Google's root servers.
  1284. """
  1285. try:
  1286. s = socket.create_connection(("8.8.8.8", 53), 0.5)
  1287. s.close()
  1288. except OSError:
  1289. return False
  1290. return True
  1291. def _is_likely_kaggle() -> bool:
  1292. # Telemetry to mark first runs from Kagglers.
  1293. return (
  1294. _is_kaggle()
  1295. or os.path.exists(
  1296. os.path.expanduser(os.path.join("~", ".kaggle", "kaggle.json"))
  1297. )
  1298. or "kaggle" in sys.modules
  1299. )
  1300. def _is_databricks() -> bool:
  1301. # check if we are running inside a databricks notebook by
  1302. # inspecting sys.modules, searching for dbutils and verifying that
  1303. # it has the appropriate structure
  1304. if "dbutils" in sys.modules:
  1305. dbutils = sys.modules["dbutils"]
  1306. if hasattr(dbutils, "shell"):
  1307. shell = dbutils.shell
  1308. if hasattr(shell, "sc"):
  1309. sc = shell.sc
  1310. if hasattr(sc, "appName"):
  1311. return bool(sc.appName == "Databricks Shell")
  1312. return False
  1313. def _is_py_requirements_or_dockerfile(path: str) -> bool:
  1314. file = os.path.basename(path)
  1315. return (
  1316. file.endswith(".py")
  1317. or file.startswith("Dockerfile")
  1318. or file == "requirements.txt"
  1319. )
  1320. def artifact_to_json(artifact: Artifact) -> dict[str, Any]:
  1321. return {
  1322. "_type": "artifactVersion",
  1323. "_version": "v0",
  1324. "id": artifact.id,
  1325. "version": artifact.source_version,
  1326. "sequenceName": artifact.source_name.split(":")[0],
  1327. "usedAs": artifact.use_as,
  1328. }
  1329. def check_dict_contains_nested_artifact(d: dict, nested: bool = False) -> bool:
  1330. for item in d.values():
  1331. if isinstance(item, dict):
  1332. contains_artifacts = check_dict_contains_nested_artifact(item, True)
  1333. if contains_artifacts:
  1334. return True
  1335. elif (isinstance(item, wandb.Artifact) or _is_artifact_string(item)) and nested:
  1336. return True
  1337. return False
  1338. def load_json_yaml_dict(config: str) -> Any:
  1339. import yaml
  1340. ext = os.path.splitext(config)[-1]
  1341. if ext == ".json":
  1342. with open(config) as f:
  1343. return json.load(f)
  1344. elif ext == ".yaml":
  1345. with open(config) as f:
  1346. return yaml.safe_load(f)
  1347. else:
  1348. try:
  1349. return json.loads(config)
  1350. except ValueError:
  1351. return None
  1352. def _parse_entity_project_item(path: str) -> tuple:
  1353. """Parse paths with the following formats: {item}, {project}/{item}, & {entity}/{project}/{item}.
  1354. Args:
  1355. path: `str`, input path; must be between 0 and 3 in length.
  1356. Returns:
  1357. tuple of length 3 - (item, project, entity)
  1358. Example:
  1359. alias, project, entity = _parse_entity_project_item("myproj/mymodel:best")
  1360. assert entity == ""
  1361. assert project == "myproj"
  1362. assert alias == "mymodel:best"
  1363. """
  1364. words = path.split("/")
  1365. if len(words) > 3:
  1366. raise ValueError(
  1367. "Invalid path: must be str the form {item}, {project}/{item}, or {entity}/{project}/{item}"
  1368. )
  1369. padded_words = [""] * (3 - len(words)) + words
  1370. return tuple(reversed(padded_words))
  1371. def _resolve_aliases(aliases: str | Iterable[str] | None) -> list[str]:
  1372. """Add the 'latest' alias and ensure that all aliases are unique.
  1373. Takes in `aliases` which can be None, str, or List[str] and returns list[str].
  1374. Ensures that "latest" is always present in the returned list.
  1375. Args:
  1376. aliases: `aliases: str | Iterable[str] | None`
  1377. Returns:
  1378. list[str], with "latest" always present.
  1379. Usage:
  1380. ```python
  1381. aliases = _resolve_aliases(["best", "dev"])
  1382. assert aliases == ["best", "dev", "latest"]
  1383. aliases = _resolve_aliases("boom")
  1384. assert aliases == ["boom", "latest"]
  1385. ```
  1386. """
  1387. aliases = aliases or ["latest"]
  1388. if isinstance(aliases, str):
  1389. aliases = [aliases]
  1390. try:
  1391. return list(set(aliases) | {"latest"})
  1392. except TypeError as exc:
  1393. raise ValueError("`aliases` must be Iterable or None") from exc
  1394. def _is_artifact_object(v: Any) -> TypeGuard[wandb.Artifact]:
  1395. return isinstance(v, wandb.Artifact)
  1396. def _is_artifact_string(v: Any) -> TypeGuard[str]:
  1397. return isinstance(v, str) and v.startswith("wandb-artifact://")
  1398. def _is_artifact_version_weave_dict(v: Any) -> TypeGuard[dict]:
  1399. return isinstance(v, dict) and v.get("_type") == "artifactVersion"
  1400. def _is_artifact_representation(v: Any) -> bool:
  1401. return (
  1402. _is_artifact_object(v)
  1403. or _is_artifact_string(v)
  1404. or _is_artifact_version_weave_dict(v)
  1405. )
  1406. def parse_artifact_string(v: str) -> tuple[str, str | None, bool]:
  1407. if not v.startswith("wandb-artifact://"):
  1408. raise ValueError(f"Invalid artifact string: {v}")
  1409. parsed_v = v[len("wandb-artifact://") :]
  1410. base_uri = None
  1411. url_info = urllib.parse.urlparse(parsed_v)
  1412. if url_info.scheme != "":
  1413. base_uri = f"{url_info.scheme}://{url_info.netloc}"
  1414. parts = url_info.path.split("/")[1:]
  1415. else:
  1416. parts = parsed_v.split("/")
  1417. if parts[0] == "_id":
  1418. # for now can't fetch paths but this will be supported in the future
  1419. # when we allow passing typed media objects, this can be extended
  1420. # to include paths
  1421. return parts[1], base_uri, True
  1422. if len(parts) < 3:
  1423. raise ValueError(f"Invalid artifact string: {v}")
  1424. # for now can't fetch paths but this will be supported in the future
  1425. # when we allow passing typed media objects, this can be extended
  1426. # to include paths
  1427. entity, project, name_and_alias_or_version = parts[:3]
  1428. return f"{entity}/{project}/{name_and_alias_or_version}", base_uri, False
  1429. def _get_max_cli_version() -> str | None:
  1430. max_cli_version = wandb.api.max_cli_version()
  1431. return str(max_cli_version) if max_cli_version is not None else None
  1432. def ensure_text(
  1433. string: str | bytes, encoding: str = "utf-8", errors: str = "strict"
  1434. ) -> str:
  1435. """Coerce s to str."""
  1436. if isinstance(string, bytes):
  1437. return string.decode(encoding, errors)
  1438. elif isinstance(string, str):
  1439. return string
  1440. else:
  1441. raise TypeError(f"not expecting type {type(string)!r}")
  1442. def make_artifact_name_safe(name: str) -> str:
  1443. """Make an artifact name safe for use in artifacts."""
  1444. # artifact names may only contain alphanumeric characters, dashes, underscores, and dots.
  1445. cleaned = re.sub(r"[^a-zA-Z0-9_\-.]", "_", name)
  1446. if len(cleaned) <= 128:
  1447. return cleaned
  1448. # truncate with dots in the middle using regex
  1449. return re.sub(r"(^.{63}).*(.{63}$)", r"\g<1>..\g<2>", cleaned)
  1450. def make_docker_image_name_safe(name: str) -> str:
  1451. """Make a docker image name safe for use in artifacts."""
  1452. safe_chars = RE_DOCKER_IMAGE_NAME_CHARS.sub("__", name.lower())
  1453. deduped = RE_DOCKER_IMAGE_NAME_SEPARATOR_REPEAT.sub("__", safe_chars)
  1454. trimmed_start = RE_DOCKER_IMAGE_NAME_SEPARATOR_START.sub("", deduped)
  1455. trimmed = RE_DOCKER_IMAGE_NAME_SEPARATOR_END.sub("", trimmed_start)
  1456. return trimmed if trimmed else "image"
  1457. def merge_dicts(
  1458. source: dict[str, Any],
  1459. destination: dict[str, Any],
  1460. ) -> dict[str, Any]:
  1461. """Recursively merge two dictionaries.
  1462. This mutates the destination and its nested dictionaries and lists.
  1463. Instances of `dict` are recursively merged and instances of `list`
  1464. are appended to the destination. If the destination type is not
  1465. `dict` or `list`, respectively, the key is overwritten with the
  1466. source value.
  1467. For all other types, the source value overwrites the destination value.
  1468. """
  1469. for key, value in source.items():
  1470. if isinstance(value, dict):
  1471. node = destination.get(key)
  1472. if isinstance(node, dict):
  1473. merge_dicts(value, node)
  1474. else:
  1475. destination[key] = value
  1476. elif isinstance(value, list):
  1477. dest_value = destination.get(key)
  1478. if isinstance(dest_value, list):
  1479. dest_value.extend(value)
  1480. else:
  1481. destination[key] = value
  1482. else:
  1483. destination[key] = value
  1484. return destination
  1485. def coalesce(*arg: Any) -> Any:
  1486. """Return the first non-none value in the list of arguments.
  1487. Similar to ?? in C#.
  1488. """
  1489. return next((a for a in arg if a is not None), None)
  1490. def recursive_cast_dictlike_to_dict(d: dict[str, Any]) -> dict[str, Any]:
  1491. for k, v in d.items():
  1492. if isinstance(v, dict):
  1493. recursive_cast_dictlike_to_dict(v)
  1494. elif hasattr(v, "keys"):
  1495. d[k] = dict(v)
  1496. recursive_cast_dictlike_to_dict(d[k])
  1497. return d
  1498. def remove_keys_with_none_values(d: dict[str, Any] | Any) -> dict[str, Any] | Any:
  1499. # otherwise iterrows will create a bunch of ugly charts
  1500. if not isinstance(d, dict):
  1501. return d
  1502. if isinstance(d, dict):
  1503. new_dict = {}
  1504. for k, v in d.items():
  1505. new_v = remove_keys_with_none_values(v)
  1506. if new_v is not None and not (isinstance(new_v, dict) and len(new_v) == 0):
  1507. new_dict[k] = new_v
  1508. return new_dict if new_dict else None
  1509. def batched(n: int, iterable: Iterable[T]) -> Generator[list[T], None, None]:
  1510. i = iter(iterable)
  1511. batch = list(itertools.islice(i, n))
  1512. while batch:
  1513. yield batch
  1514. batch = list(itertools.islice(i, n))
  1515. def random_string(length: int = 12) -> str:
  1516. """Generate a random string of a given length.
  1517. :param length: Length of the string to generate.
  1518. :return: Random string.
  1519. """
  1520. return "".join(
  1521. secrets.choice(string.ascii_lowercase + string.digits) for _ in range(length)
  1522. )
  1523. def sample_with_exponential_decay_weights(
  1524. xs: Iterable | Iterable[Iterable],
  1525. ys: Iterable[Iterable],
  1526. keys: Iterable | None = None,
  1527. sample_size: int = 1500,
  1528. ) -> tuple[list, list, list | None]:
  1529. """Sample from a list of lists with weights that decay exponentially.
  1530. May be used with the wandb.plot.line_series function.
  1531. """
  1532. xs_array = np.array(xs)
  1533. ys_array = np.array(ys)
  1534. keys_array = np.array(keys) if keys else None
  1535. weights = np.exp(-np.arange(len(xs_array)) / len(xs_array))
  1536. weights /= np.sum(weights)
  1537. sampled_indices = np.random.choice(len(xs_array), size=sample_size, p=weights)
  1538. sampled_xs = xs_array[sampled_indices].tolist()
  1539. sampled_ys = ys_array[sampled_indices].tolist()
  1540. sampled_keys = keys_array[sampled_indices].tolist() if keys_array else None
  1541. return sampled_xs, sampled_ys, sampled_keys
  1542. @dataclasses.dataclass(frozen=True)
  1543. class InstalledDistribution:
  1544. """An installed distribution.
  1545. Attributes:
  1546. key: The distribution name as it would be imported.
  1547. version: The distribution's version string.
  1548. """
  1549. key: str
  1550. version: str
  1551. def working_set() -> Iterable[InstalledDistribution]:
  1552. """Return the working set of installed distributions."""
  1553. from importlib.metadata import distributions
  1554. for d in distributions():
  1555. with contextlib.suppress(KeyError, UnicodeDecodeError, TypeError):
  1556. # In some distributions, the "Name" attribute may not be present,
  1557. # or the metadata itself may be None or malformed, which can raise
  1558. # KeyError, UnicodeDecodeError, or TypeError.
  1559. # For additional context, see: https://github.com/python/importlib_metadata/issues/371.
  1560. yield InstalledDistribution(key=d.metadata["Name"], version=d.version)
  1561. def get_core_path() -> str:
  1562. """Returns the path to the wandb-core binary.
  1563. The path can be set explicitly via the _WANDB_CORE_PATH environment
  1564. variable. Otherwise, the path to the binary in the current package
  1565. is returned.
  1566. Returns:
  1567. str: The path to the wandb-core package.
  1568. Raises:
  1569. WandbCoreNotAvailableError: If wandb-core was not built for the current system.
  1570. """
  1571. # NOTE: Environment variable _WANDB_CORE_PATH is a temporary development feature
  1572. # to assist in running the core service from a live development directory.
  1573. path_from_env: str = os.environ.get("_WANDB_CORE_PATH", "")
  1574. if path_from_env:
  1575. wandb.termwarn(
  1576. f"Using wandb-core from path `_WANDB_CORE_PATH={path_from_env}`. "
  1577. "This is a development feature and may not work as expected."
  1578. )
  1579. return path_from_env
  1580. bin_path = pathlib.Path(__file__).parent / "bin" / "wandb-core"
  1581. if not bin_path.exists():
  1582. raise WandbCoreNotAvailableError(
  1583. f"File not found: {bin_path}."
  1584. " Please contact support at support@wandb.com."
  1585. f" Your platform is: {platform.platform()}."
  1586. )
  1587. return str(bin_path)