Source code for inseq.commands.attribute_dataset.attribute_dataset_args

from dataclasses import dataclass
from typing import Optional

from ...utils import cli_arg
from ..commands_utils import command_args_docstring

[docs] @command_args_docstring @dataclass class LoadDatasetArgs: dataset_name: str = cli_arg( aliases=["-d", "--dataset"], help="The type of dataset to be loaded for attribution.", ) input_text_field: Optional[str] = cli_arg( aliases=["-in", "--input"], help="Name of the field containing the input texts used for attribution." ) generated_text_field: Optional[str] = cli_arg( default=None, aliases=["-gen", "--generated"], help="Name of the field containing the generated texts used for constrained decoding.", ) dataset_config: Optional[str] = cli_arg( default=None, aliases=["--config"], help="The name of the Huggingface dataset configuration." ) dataset_dir: Optional[str] = cli_arg( default=None, aliases=["--dir"], help="Path to the directory containing the data files." ) dataset_files: Optional[list[str]] = cli_arg(default=None, aliases=["--files"], help="Path to the dataset files.") dataset_split: Optional[str] = cli_arg(default="train", aliases=["--split"], help="Dataset split.") dataset_revision: Optional[str] = cli_arg( default=None, aliases=["--revision"], help="The Huggingface dataset revision." ) dataset_auth_token: Optional[str] = cli_arg( default=None, aliases=["--auth"], help="The auth token for the Huggingface dataset." ) dataset_kwargs: Optional[dict] = cli_arg( default_factory=dict, help="Additional keyword arguments passed to the dataset constructor in JSON format.", )