device.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import os
  3. from contextlib import contextmanager
  4. from modelscope.utils.constant import Devices, Frameworks
  5. from modelscope.utils.logger import get_logger
  6. logger = get_logger()
  7. def verify_device(device_name):
  8. """ Verify device is valid, device should be either cpu, cuda, gpu, cuda:X or gpu:X.
  9. Args:
  10. device (str): device str, should be either cpu, cuda, gpu, gpu:X or cuda:X
  11. where X is the ordinal for gpu device.
  12. Return:
  13. device info (tuple): device_type and device_id, if device_id is not set, will use 0 as default.
  14. """
  15. err_msg = 'device should be either cpu, cuda, gpu, gpu:X or cuda:X where X is the ordinal for gpu device.'
  16. assert device_name is not None and device_name != '', err_msg
  17. device_name = device_name.lower()
  18. eles = device_name.split(':')
  19. assert len(eles) <= 2, err_msg
  20. assert device_name is not None
  21. assert eles[0] in ['cpu', 'cuda', 'gpu'], err_msg
  22. device_type = eles[0]
  23. device_id = None
  24. if len(eles) > 1:
  25. device_id = int(eles[1])
  26. if device_type == 'cuda':
  27. device_type = Devices.gpu
  28. if device_type == Devices.gpu and device_id is None:
  29. device_id = 0
  30. return device_type, device_id
  31. @contextmanager
  32. def device_placement(framework, device_name='gpu:0'):
  33. """ Device placement function, allow user to specify which device to place model or tensor
  34. Args:
  35. framework (str): tensorflow or pytorch.
  36. device (str): gpu or cpu to use, if you want to specify certain gpu,
  37. use gpu:$gpu_id or cuda:$gpu_id.
  38. Returns:
  39. Context manager
  40. Examples:
  41. >>> # Requests for using model on cuda:0 for gpu
  42. >>> with device_placement('pytorch', device='gpu:0'):
  43. >>> model = Model.from_pretrained(...)
  44. """
  45. device_type, device_id = verify_device(device_name)
  46. if framework == Frameworks.tf:
  47. import tensorflow as tf
  48. if device_type == Devices.gpu and not tf.test.is_gpu_available():
  49. logger.debug(
  50. 'tensorflow: cuda is not available, using cpu instead.')
  51. device_type = Devices.cpu
  52. if device_type == Devices.cpu:
  53. with tf.device('/CPU:0'):
  54. yield
  55. else:
  56. if device_type == Devices.gpu:
  57. with tf.device(f'/device:gpu:{device_id}'):
  58. yield
  59. elif framework == Frameworks.torch:
  60. import torch
  61. if device_type == Devices.gpu:
  62. if torch.cuda.is_available():
  63. torch.cuda.set_device(f'cuda:{device_id}')
  64. else:
  65. logger.debug(
  66. 'pytorch: cuda is not available, using cpu instead.')
  67. yield
  68. else:
  69. yield
  70. def create_device(device_name):
  71. """ create torch device
  72. Args:
  73. device_name (str): cpu, gpu, gpu:0, cuda:0 etc.
  74. """
  75. import torch
  76. device_type, device_id = verify_device(device_name)
  77. use_cuda = False
  78. if device_type == Devices.gpu:
  79. use_cuda = True
  80. if not torch.cuda.is_available():
  81. logger.info('cuda is not available, using cpu instead.')
  82. use_cuda = False
  83. if use_cuda:
  84. device = torch.device(f'cuda:{device_id}')
  85. else:
  86. device = torch.device('cpu')
  87. return device
  88. def get_device():
  89. import torch
  90. from torch import distributed as dist
  91. if torch.cuda.is_available():
  92. if dist.is_available() and dist.is_initialized(
  93. ) and 'LOCAL_RANK' in os.environ:
  94. device_id = f"cuda:{os.environ['LOCAL_RANK']}"
  95. else:
  96. device_id = 'cuda:0'
  97. else:
  98. device_id = 'cpu'
  99. return torch.device(device_id)