llamafile.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import logging
  3. import os
  4. import sys
  5. from argparse import ArgumentParser
  6. from modelscope import model_file_download
  7. from modelscope.cli.base import CLICommand
  8. from modelscope.hub.api import HubApi
  9. from modelscope.utils.logger import get_logger
  10. logger = get_logger(log_level=logging.WARNING)
  11. def subparser_func(args):
  12. """ Function which will be called for a specific sub parser.
  13. """
  14. return LlamafileCMD(args)
  15. class LlamafileCMD(CLICommand):
  16. name = 'llamafile'
  17. def __init__(self, args):
  18. self.args = args
  19. self.model_id = self.args.model
  20. if self.model_id is None or self.model_id.count('/') != 1:
  21. raise ValueError(f'Invalid model id [{self.model_id}].')
  22. if self.args.file is not None:
  23. # ignore accuracy if file argument is provided
  24. self.args.accuracy = None
  25. if not self.args.file.lower().endswith('.llamafile'):
  26. raise ValueError('file argument must ends with ".llamafile".')
  27. self.api = HubApi()
  28. @staticmethod
  29. def define_args(parsers: ArgumentParser):
  30. """ define args for clear-cache command.
  31. """
  32. parser = parsers.add_parser(LlamafileCMD.name)
  33. parser.add_argument(
  34. '--model',
  35. type=str,
  36. required=True,
  37. help=
  38. 'The id of the model, whose repo must contain at least one llamafile'
  39. )
  40. group = parser.add_mutually_exclusive_group()
  41. group.add_argument(
  42. '--accuracy',
  43. type=str,
  44. required=False,
  45. default='q4_k_m',
  46. help=
  47. 'Selected accuracy of GGUF files in the repo. Ignored when "file" is also provided.'
  48. )
  49. group.add_argument(
  50. '--file',
  51. type=str,
  52. required=False,
  53. help=
  54. 'The name of a specified llamafile in the model repo. This takes precedence over "accuracy".'
  55. )
  56. parser.add_argument(
  57. '--local_dir',
  58. type=str,
  59. default=None,
  60. help=
  61. 'Directory where the selected llamafile would will be downloaded to.'
  62. )
  63. group.add_argument(
  64. '--launch',
  65. type=str,
  66. required=False,
  67. default='True',
  68. help=
  69. 'Whether to launch model with the downloaded llamafile, default to True.'
  70. )
  71. parser.set_defaults(func=subparser_func)
  72. def execute(self):
  73. if self.args.file:
  74. self.args.accuracy = None
  75. all_files = self.api.get_model_files(self.model_id, recursive=True)
  76. llamafiles = []
  77. for info in all_files:
  78. file_path = info['Path']
  79. if file_path and file_path.lower().endswith(
  80. '.llamafile') and '-of-' not in file_path.lower():
  81. llamafiles.append(file_path)
  82. if not llamafiles:
  83. raise ValueError(
  84. f'Cannot locate a valid llamafile in repo {self.model_id}.')
  85. logger.info(
  86. f'list of llamafiles in repo {self.model_id}:\n{llamafiles}.')
  87. # default choose the first llamafile if there is no q4_k_m, and no accuracy or file is specified
  88. selected_file = llamafiles[0]
  89. found = False
  90. for f in llamafiles:
  91. if self.args.file and f == self.args.file:
  92. selected_file = f
  93. found = True
  94. break
  95. if self.args.accuracy and self.args.accuracy.lower() in f.lower():
  96. selected_file = f
  97. found = True
  98. break
  99. if found:
  100. print(f'llamafile matching criteria found: [{selected_file}].')
  101. else:
  102. print(
  103. f'No matched llamafile found in repo, choosing the first llamafile in repo: [{selected_file}]'
  104. )
  105. downloaded_file = os.path.abspath(
  106. model_file_download(
  107. self.args.model, selected_file, local_dir=self.args.local_dir))
  108. if sys.platform.startswith('win'):
  109. downloaded_file = self._rename_extension(downloaded_file)
  110. if self.args.launch.lower() == 'true':
  111. print(f'Launching model with llamafile [{downloaded_file}]:')
  112. self._execute_llamafile(downloaded_file)
  113. else:
  114. print(
  115. f'No Launching. Llamafile model downloaded to [{downloaded_file}], you may execute it separately.'
  116. )
  117. def _execute_llamafile(self, file_path):
  118. current_mode = os.stat(file_path).st_mode
  119. new_mode = current_mode | 0o111
  120. os.chmod(file_path, new_mode)
  121. execute_cmd = file_path
  122. has_gpu = False
  123. try:
  124. import torch
  125. has_gpu = torch.cuda.is_available()
  126. except ModuleNotFoundError:
  127. # we depend on torch to detect gpu.
  128. # if torch is not available, we will just assume gpu cannot be used
  129. pass
  130. if has_gpu:
  131. print(
  132. 'GPU detected, launching model with llamafile GPU option >>>')
  133. execute_cmd = f'{execute_cmd} -ngl 999'
  134. os.system(execute_cmd)
  135. def _rename_extension(self, original_file_name):
  136. directory, filename = os.path.split(original_file_name)
  137. base_name, _ = os.path.splitext(filename)
  138. new_filename = os.path.join(directory, f'{base_name}.exe')
  139. os.rename(original_file_name, new_filename)
  140. return new_filename