| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- from __future__ import annotations
- import sys
- from typing import TYPE_CHECKING
- from argparse import ArgumentParser
- from .._models import BaseModel
- from ...lib._validators import (
- get_validators,
- write_out_file,
- read_any_format,
- apply_validators,
- apply_necessary_remediation,
- )
- if TYPE_CHECKING:
- from argparse import _SubParsersAction
- def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
- sub = subparser.add_parser("fine_tunes.prepare_data")
- sub.add_argument(
- "-f",
- "--file",
- required=True,
- help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
- "This should be the local file path.",
- )
- sub.add_argument(
- "-q",
- "--quiet",
- required=False,
- action="store_true",
- help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
- )
- sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
- class PrepareDataArgs(BaseModel):
- file: str
- quiet: bool
- def prepare_data(args: PrepareDataArgs) -> None:
- sys.stdout.write("Analyzing...\n")
- fname = args.file
- auto_accept = args.quiet
- df, remediation = read_any_format(fname)
- apply_necessary_remediation(None, remediation)
- validators = get_validators()
- assert df is not None
- apply_validators(
- df,
- fname,
- remediation,
- validators,
- auto_accept,
- write_out_file_func=write_out_file,
- )
|