fine_tunes.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from __future__ import annotations
  2. import sys
  3. from typing import TYPE_CHECKING
  4. from argparse import ArgumentParser
  5. from .._models import BaseModel
  6. from ...lib._validators import (
  7. get_validators,
  8. write_out_file,
  9. read_any_format,
  10. apply_validators,
  11. apply_necessary_remediation,
  12. )
  13. if TYPE_CHECKING:
  14. from argparse import _SubParsersAction
  15. def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
  16. sub = subparser.add_parser("fine_tunes.prepare_data")
  17. sub.add_argument(
  18. "-f",
  19. "--file",
  20. required=True,
  21. help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
  22. "This should be the local file path.",
  23. )
  24. sub.add_argument(
  25. "-q",
  26. "--quiet",
  27. required=False,
  28. action="store_true",
  29. help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
  30. )
  31. sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
  32. class PrepareDataArgs(BaseModel):
  33. file: str
  34. quiet: bool
  35. def prepare_data(args: PrepareDataArgs) -> None:
  36. sys.stdout.write("Analyzing...\n")
  37. fname = args.file
  38. auto_accept = args.quiet
  39. df, remediation = read_any_format(fname)
  40. apply_necessary_remediation(None, remediation)
  41. validators = get_validators()
  42. assert df is not None
  43. apply_validators(
  44. df,
  45. fname,
  46. remediation,
  47. validators,
  48. auto_accept,
  49. write_out_file_func=write_out_file,
  50. )