model_parallel_utils.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # Copyright 2020 The HuggingFace Team. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from math import ceil
  15. def assert_device_map(device_map, num_blocks):
  16. blocks = list(range(0, num_blocks))
  17. device_map_blocks = [item for sublist in list(device_map.values()) for item in sublist]
  18. # Duplicate check
  19. duplicate_blocks = []
  20. for i in device_map_blocks:
  21. if device_map_blocks.count(i) > 1 and i not in duplicate_blocks:
  22. duplicate_blocks.append(i)
  23. # Missing blocks
  24. missing_blocks = [i for i in blocks if i not in device_map_blocks]
  25. extra_blocks = [i for i in device_map_blocks if i not in blocks]
  26. if len(duplicate_blocks) != 0:
  27. raise ValueError(
  28. "Duplicate attention blocks specified in device_map. Attention blocks must be specified to one device."
  29. " These attention blocks were specified more than once: " + str(duplicate_blocks)
  30. )
  31. if len(missing_blocks) != 0:
  32. raise ValueError(
  33. "There are attention blocks for this model that are not specified in the device_map. Add these attention "
  34. "blocks to a device on the device_map: " + str(missing_blocks)
  35. )
  36. if len(extra_blocks) != 0:
  37. raise ValueError(
  38. "The device_map contains more attention blocks than this model has. Remove these from the device_map:"
  39. + str(extra_blocks)
  40. )
  41. def get_device_map(n_layers, devices):
  42. """Returns a dictionary of layers distributed evenly across all devices."""
  43. layers = list(range(n_layers))
  44. n_blocks = int(ceil(n_layers / len(devices)))
  45. layers_list = [layers[i : i + n_blocks] for i in range(0, n_layers, n_blocks)]
  46. return dict(zip(devices, layers_list))