env.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. """All of W&B's environment variables.
  2. Getters and putters for all of them should go here. That way it'll be easier to
  3. avoid typos with names and be consistent about environment variables' semantics.
  4. Environment variables are not the authoritative source for these values in many
  5. cases.
  6. """
  7. from __future__ import annotations
  8. import json
  9. import os
  10. import sys
  11. from pathlib import Path
  12. from typing import MutableMapping
  13. import platformdirs
  14. CONFIG_PATHS = "WANDB_CONFIG_PATHS"
  15. SWEEP_PARAM_PATH = "WANDB_SWEEP_PARAM_PATH"
  16. SHOW_RUN = "WANDB_SHOW_RUN"
  17. DEBUG = "WANDB_DEBUG"
  18. SILENT = "WANDB_SILENT"
  19. QUIET = "WANDB_QUIET"
  20. INITED = "WANDB_INITED"
  21. DIR = "WANDB_DIR"
  22. # Deprecate DESCRIPTION in a future release
  23. DESCRIPTION = "WANDB_DESCRIPTION"
  24. NAME = "WANDB_NAME"
  25. NOTEBOOK_NAME = "WANDB_NOTEBOOK_NAME"
  26. NOTES = "WANDB_NOTES"
  27. USERNAME = "WANDB_USERNAME"
  28. USER_EMAIL = "WANDB_USER_EMAIL"
  29. PROJECT = "WANDB_PROJECT"
  30. ENTITY = "WANDB_ENTITY"
  31. ORGANIZATION = "WANDB_ORGANIZATION"
  32. BASE_URL = "WANDB_BASE_URL"
  33. APP_URL = "WANDB_APP_URL"
  34. PROGRAM = "WANDB_PROGRAM"
  35. ARGS = "WANDB_ARGS"
  36. MODE = "WANDB_MODE"
  37. START_METHOD = "WANDB_START_METHOD"
  38. RESUME = "WANDB_RESUME"
  39. RUN_ID = "WANDB_RUN_ID"
  40. RUN_STORAGE_ID = "WANDB_RUN_STORAGE_ID"
  41. RUN_GROUP = "WANDB_RUN_GROUP"
  42. RUN_DIR = "WANDB_RUN_DIR"
  43. SWEEP_ID = "WANDB_SWEEP_ID"
  44. HTTP_TIMEOUT = "WANDB_HTTP_TIMEOUT"
  45. FILE_PUSHER_TIMEOUT = "WANDB_FILE_PUSHER_TIMEOUT"
  46. API_KEY = "WANDB_API_KEY"
  47. IDENTITY_TOKEN_FILE = "WANDB_IDENTITY_TOKEN_FILE"
  48. CREDENTIALS_FILE = "WANDB_CREDENTIALS_FILE"
  49. JOB_TYPE = "WANDB_JOB_TYPE"
  50. DISABLE_CODE = "WANDB_DISABLE_CODE"
  51. DISABLE_GIT = "WANDB_DISABLE_GIT"
  52. GIT_ROOT = "WANDB_GIT_ROOT"
  53. SAVE_CODE = "WANDB_SAVE_CODE"
  54. TAGS = "WANDB_TAGS"
  55. IGNORE = "WANDB_IGNORE_GLOBS"
  56. ERROR_REPORTING = "WANDB_ERROR_REPORTING"
  57. CORE_DEBUG = "WANDB_CORE_DEBUG"
  58. DOCKER = "WANDB_DOCKER"
  59. AGENT_REPORT_INTERVAL = "WANDB_AGENT_REPORT_INTERVAL"
  60. AGENT_KILL_DELAY = "WANDB_AGENT_KILL_DELAY"
  61. AGENT_DISABLE_FLAPPING = "WANDB_AGENT_DISABLE_FLAPPING"
  62. AGENT_MAX_INITIAL_FAILURES = "WANDB_AGENT_MAX_INITIAL_FAILURES"
  63. CRASH_NOSYNC_TIME = "WANDB_CRASH_NOSYNC_TIME"
  64. MAGIC = "WANDB_MAGIC"
  65. HOST = "WANDB_HOST"
  66. ANONYMOUS = "WANDB_ANONYMOUS"
  67. JUPYTER = "WANDB_JUPYTER"
  68. CONFIG_DIR = "WANDB_CONFIG_DIR"
  69. DATA_DIR = "WANDB_DATA_DIR"
  70. ARTIFACT_DIR = "WANDB_ARTIFACT_DIR"
  71. ARTIFACT_FETCH_FILE_URL_BATCH_SIZE = "WANDB_ARTIFACT_FETCH_FILE_URL_BATCH_SIZE"
  72. CACHE_DIR = "WANDB_CACHE_DIR"
  73. DISABLE_SSL = "WANDB_INSECURE_DISABLE_SSL"
  74. SERVICE = "WANDB_SERVICE"
  75. SENTRY_DSN = "WANDB_SENTRY_DSN"
  76. INIT_TIMEOUT = "WANDB_INIT_TIMEOUT"
  77. GIT_COMMIT = "WANDB_GIT_COMMIT"
  78. GIT_REMOTE_URL = "WANDB_GIT_REMOTE_URL"
  79. _EXECUTABLE = "WANDB_X_EXECUTABLE"
  80. LAUNCH_QUEUE_NAME = "WANDB_LAUNCH_QUEUE_NAME"
  81. LAUNCH_QUEUE_ENTITY = "WANDB_LAUNCH_QUEUE_ENTITY"
  82. LAUNCH_TRACE_ID = "WANDB_LAUNCH_TRACE_ID"
  83. ENABLE_DCGM_PROFILING = "WANDB_ENABLE_DCGM_PROFILING"
  84. # For testing, to be removed in future version
  85. USE_V1_ARTIFACTS = "_WANDB_USE_V1_ARTIFACTS"
  86. def immutable_keys() -> list[str]:
  87. """These are env keys that shouldn't change within a single process.
  88. We use this to maintain certain values between multiple calls to wandb.init within a single process.
  89. """
  90. return [
  91. DIR,
  92. ENTITY,
  93. PROJECT,
  94. API_KEY,
  95. IGNORE,
  96. DISABLE_CODE,
  97. DISABLE_GIT,
  98. DOCKER,
  99. MODE,
  100. BASE_URL,
  101. ERROR_REPORTING,
  102. CRASH_NOSYNC_TIME,
  103. MAGIC,
  104. USERNAME,
  105. USER_EMAIL,
  106. DIR,
  107. SILENT,
  108. CONFIG_PATHS,
  109. ANONYMOUS,
  110. RUN_GROUP,
  111. JOB_TYPE,
  112. TAGS,
  113. RESUME,
  114. AGENT_REPORT_INTERVAL,
  115. HTTP_TIMEOUT,
  116. HOST,
  117. DATA_DIR,
  118. ARTIFACT_DIR,
  119. ARTIFACT_FETCH_FILE_URL_BATCH_SIZE,
  120. CACHE_DIR,
  121. USE_V1_ARTIFACTS,
  122. DISABLE_SSL,
  123. IDENTITY_TOKEN_FILE,
  124. CREDENTIALS_FILE,
  125. ]
  126. def _env_as_bool(
  127. var: str, default: str | None = None, env: MutableMapping | None = None
  128. ) -> bool:
  129. if env is None:
  130. env = os.environ
  131. val = env.get(var, default)
  132. if not isinstance(val, str):
  133. return False
  134. try:
  135. return strtobool(val)
  136. except ValueError:
  137. return False
  138. def is_debug(default: str | None = None, env: MutableMapping | None = None) -> bool:
  139. return _env_as_bool(DEBUG, default=default, env=env)
  140. def is_offline(env: MutableMapping | None = None) -> bool:
  141. if env is None:
  142. env = os.environ
  143. return env.get(MODE) == "offline"
  144. def is_quiet() -> bool:
  145. return _env_as_bool(QUIET, default="false")
  146. def is_silent() -> bool:
  147. return _env_as_bool(SILENT, default="false")
  148. def error_reporting_enabled() -> bool:
  149. return _env_as_bool(ERROR_REPORTING, default="True")
  150. def core_debug(default: str | None = None) -> bool:
  151. return _env_as_bool(CORE_DEBUG, default=default) or is_debug()
  152. def ssl_disabled() -> bool:
  153. return _env_as_bool(DISABLE_SSL, default="False")
  154. def dcgm_profiling_enabled() -> bool:
  155. """Checks whether collecting profiling metrics for Nvidia GPUs using DCGM is requested.
  156. Note: Enabling this feature can lead to increased resource usage
  157. compared to standard monitoring.
  158. Requires the `nvidia-dcgm` service to be running on the machine.
  159. """
  160. return _env_as_bool(ENABLE_DCGM_PROFILING, default="False")
  161. def get_error_reporting(
  162. default: bool | str = True,
  163. env: MutableMapping | None = None,
  164. ) -> bool | str:
  165. if env is None:
  166. env = os.environ
  167. return env.get(ERROR_REPORTING, default)
  168. def get_run(
  169. default: str | None = None, env: MutableMapping | None = None
  170. ) -> str | None:
  171. if env is None:
  172. env = os.environ
  173. return env.get(RUN_ID, default)
  174. def get_args(
  175. default: list[str] | None = None, env: MutableMapping | None = None
  176. ) -> list[str] | None:
  177. if env is None:
  178. env = os.environ
  179. if env.get(ARGS):
  180. try:
  181. return json.loads(env.get(ARGS, "[]")) # type: ignore
  182. except ValueError:
  183. return None
  184. else:
  185. return default or sys.argv[1:]
  186. def get_docker(
  187. default: str | None = None, env: MutableMapping | None = None
  188. ) -> str | None:
  189. if env is None:
  190. env = os.environ
  191. return env.get(DOCKER, default)
  192. def get_http_timeout(default: int = 20, env: MutableMapping | None = None) -> int:
  193. if env is None:
  194. env = os.environ
  195. return int(env.get(HTTP_TIMEOUT, default))
  196. def get_file_pusher_timeout(
  197. default: int | None = None,
  198. env: MutableMapping | None = None,
  199. ) -> int | None:
  200. if env is None:
  201. env = os.environ
  202. timeout = env.get(FILE_PUSHER_TIMEOUT, default)
  203. return int(timeout) if timeout else None
  204. def get_ignore(
  205. default: list[str] | None = None, env: MutableMapping | None = None
  206. ) -> list[str] | None:
  207. if env is None:
  208. env = os.environ
  209. ignore = env.get(IGNORE)
  210. if ignore is not None:
  211. return ignore.split(",")
  212. else:
  213. return default
  214. def get_project(
  215. default: str | None = None, env: MutableMapping | None = None
  216. ) -> str | None:
  217. if env is None:
  218. env = os.environ
  219. return env.get(PROJECT, default)
  220. def get_username(
  221. default: str | None = None, env: MutableMapping | None = None
  222. ) -> str | None:
  223. if env is None:
  224. env = os.environ
  225. return env.get(USERNAME, default)
  226. def get_user_email(
  227. default: str | None = None, env: MutableMapping | None = None
  228. ) -> str | None:
  229. if env is None:
  230. env = os.environ
  231. return env.get(USER_EMAIL, default)
  232. def get_entity(
  233. default: str | None = None, env: MutableMapping | None = None
  234. ) -> str | None:
  235. if env is None:
  236. env = os.environ
  237. return env.get(ENTITY, default)
  238. def get_organization(
  239. default: str | None = None, env: MutableMapping | None = None
  240. ) -> str | None:
  241. if env is None:
  242. env = os.environ
  243. return env.get(ORGANIZATION, default)
  244. def get_base_url(
  245. default: str | None = None, env: MutableMapping | None = None
  246. ) -> str | None:
  247. if env is None:
  248. env = os.environ
  249. return env.get(BASE_URL, default)
  250. def get_app_url(
  251. default: str | None = None, env: MutableMapping | None = None
  252. ) -> str | None:
  253. if env is None:
  254. env = os.environ
  255. return env.get(APP_URL, default)
  256. def get_show_run(default: str | None = None, env: MutableMapping | None = None) -> bool:
  257. if env is None:
  258. env = os.environ
  259. return bool(env.get(SHOW_RUN, default))
  260. def get_description(
  261. default: str | None = None, env: MutableMapping | None = None
  262. ) -> str | None:
  263. if env is None:
  264. env = os.environ
  265. return env.get(DESCRIPTION, default)
  266. def get_tags(default: str = "", env: MutableMapping | None = None) -> list[str]:
  267. if env is None:
  268. env = os.environ
  269. return [tag for tag in env.get(TAGS, default).split(",") if tag]
  270. def get_dir(
  271. default: str | None = None, env: MutableMapping | None = None
  272. ) -> str | None:
  273. if env is None:
  274. env = os.environ
  275. return env.get(DIR, default)
  276. def get_config_paths(
  277. default: str | None = None, env: MutableMapping | None = None
  278. ) -> str | None:
  279. if env is None:
  280. env = os.environ
  281. return env.get(CONFIG_PATHS, default)
  282. def get_agent_report_interval(
  283. default: str | None = None, env: MutableMapping | None = None
  284. ) -> int | None:
  285. if env is None:
  286. env = os.environ
  287. val = env.get(AGENT_REPORT_INTERVAL, default)
  288. try:
  289. val = int(val) # type: ignore
  290. except ValueError:
  291. val = None # silently ignore env format errors, caller should handle.
  292. return val
  293. def get_agent_kill_delay(
  294. default: str | None = None, env: MutableMapping | None = None
  295. ) -> int | None:
  296. if env is None:
  297. env = os.environ
  298. val = env.get(AGENT_KILL_DELAY, default)
  299. try:
  300. val = int(val) # type: ignore
  301. except ValueError:
  302. val = None # silently ignore env format errors, caller should handle.
  303. return val
  304. def get_crash_nosync_time(
  305. default: str | None = None, env: MutableMapping | None = None
  306. ) -> int | None:
  307. if env is None:
  308. env = os.environ
  309. val = env.get(CRASH_NOSYNC_TIME, default)
  310. try:
  311. val = int(val) # type: ignore
  312. except ValueError:
  313. val = None # silently ignore env format errors, caller should handle.
  314. return val
  315. def get_magic(
  316. default: str | None = None, env: MutableMapping | None = None
  317. ) -> str | None:
  318. if env is None:
  319. env = os.environ
  320. val = env.get(MAGIC, default)
  321. return val
  322. def get_data_dir(env: MutableMapping | None = None) -> str:
  323. default_dir = platformdirs.user_data_dir("wandb")
  324. if env is None:
  325. env = os.environ
  326. val = env.get(DATA_DIR, default_dir)
  327. return val
  328. def get_artifact_dir(env: MutableMapping | None = None) -> str:
  329. default_dir = os.path.join(".", "artifacts")
  330. if env is None:
  331. env = os.environ
  332. val = env.get(ARTIFACT_DIR, default_dir)
  333. return os.path.abspath(str(val))
  334. def get_artifact_fetch_file_url_batch_size(env: MutableMapping | None = None) -> int:
  335. default_batch_size = 5000
  336. if env is None:
  337. env = os.environ
  338. val = int(env.get(ARTIFACT_FETCH_FILE_URL_BATCH_SIZE, default_batch_size))
  339. return val
  340. def get_cache_dir(env: MutableMapping | None = None) -> Path:
  341. env = env or os.environ
  342. return Path(env.get(CACHE_DIR, platformdirs.user_cache_dir("wandb")))
  343. def get_use_v1_artifacts(env: MutableMapping | None = None) -> bool:
  344. if env is None:
  345. env = os.environ
  346. val = bool(env.get(USE_V1_ARTIFACTS, False))
  347. return val
  348. def get_agent_max_initial_failures(
  349. default: int | None = None, env: MutableMapping | None = None
  350. ) -> int | None:
  351. if env is None:
  352. env = os.environ
  353. val = env.get(AGENT_MAX_INITIAL_FAILURES, default)
  354. try:
  355. val = int(val) # type: ignore
  356. except ValueError:
  357. val = default
  358. return val
  359. def set_entity(value: str, env: MutableMapping | None = None) -> None:
  360. if env is None:
  361. env = os.environ
  362. env[ENTITY] = value
  363. def set_project(value: str, env: MutableMapping | None = None) -> None:
  364. if env is None:
  365. env = os.environ
  366. env[PROJECT] = value or "uncategorized"
  367. def should_save_code() -> bool:
  368. save_code = _env_as_bool(SAVE_CODE, default="False")
  369. code_disabled = _env_as_bool(DISABLE_CODE, default="False")
  370. return save_code and not code_disabled
  371. def disable_git(env: MutableMapping | None = None) -> bool:
  372. if env is None:
  373. env = os.environ
  374. val = env.get(DISABLE_GIT, default="False")
  375. if isinstance(val, str):
  376. val = False if val.lower() == "false" else True
  377. return val
  378. def get_launch_queue_name(env: MutableMapping | None = None) -> str | None:
  379. if env is None:
  380. env = os.environ
  381. val = env.get(LAUNCH_QUEUE_NAME, None)
  382. return val
  383. def get_launch_queue_entity(env: MutableMapping | None = None) -> str | None:
  384. if env is None:
  385. env = os.environ
  386. val = env.get(LAUNCH_QUEUE_ENTITY, None)
  387. return val
  388. def get_launch_trace_id(env: MutableMapping | None = None) -> str | None:
  389. if env is None:
  390. env = os.environ
  391. val = env.get(LAUNCH_TRACE_ID, None)
  392. return val
  393. def get_credentials_file(default: str, env: MutableMapping | None = None) -> Path:
  394. """Retrieve the path for the credentials file used to save access tokens.
  395. The credentials file path can be set via an environment variable, otherwise
  396. the default path is used.
  397. """
  398. if env is None:
  399. env = os.environ
  400. credentials_file = env.get(CREDENTIALS_FILE, default)
  401. return Path(credentials_file)
  402. def strtobool(val: str) -> bool:
  403. """Convert a string representation of truth to true or false.
  404. Copied from distutils. distutils was removed in Python 3.12.
  405. """
  406. val = val.lower()
  407. if val in ("y", "yes", "t", "true", "on", "1"):
  408. return True
  409. elif val in ("n", "no", "f", "false", "off", "0"):
  410. return False
  411. else:
  412. raise ValueError(f"invalid truth value {val!r}")