Loading...
Loading...
Use when the user wants to create a dataset, generate synthetic data, or build a data generation pipeline.
npx skill4agent add nvidia/skills nemo-data-designer-pluginworkflows/interactive.mdworkflows/autopilot.mdreferences/seed-datasets.mdreferences/person-sampling.mdModelConfigreferences/nemo-platform-plugin-additions.mdsampler_type="category"params=dd.CategorySamplerParams(...)promptsystem_promptexpr{{ column_name }}{{ column_name.field }}**SamplerColumnConfigparamssampler_paramsLLMJudgeColumnConfig{reasoning: str, score: int}.scorequalitycorrectness{{ quality.correctness.score }}{{ quality.correctness }}**nemo data-designernemo data-designerload_config_builder()DataDesignerConfigBuildercustomer_reviews.py# /// script
# dependencies = [
# "data-designer", # always required
# "pydantic", # only if this script imports from pydantic
# # add additional dependencies here
# ]
# ///
import data_designer.config as dd
from pydantic import BaseModel, Field
# Use Pydantic models when the output needs to conform to a specific schema
class MyStructuredOutput(BaseModel):
field_one: str = Field(description="...")
field_two: int = Field(description="...")
# Use custom generators when built-in column types aren't enough
@dd.custom_column_generator(
required_columns=["col_a"],
side_effect_columns=["extra_col"],
)
def generator_function(row: dict) -> dict:
# add custom logic here that depends on "col_a" and update row in place
row["name_in_custom_column_config"] = "custom value"
row["extra_col"] = "extra value"
return row
def load_config_builder() -> dd.DataDesignerConfigBuilder:
config_builder = dd.DataDesignerConfigBuilder(
# Declaring model configs programmatically here is the portable path:
# it works for both local `run` and cluster `submit`, while the local
# YAML registry alternative only works for `run`. The provider below
# is a common default created during `nemo setup` — confirm it (or
# discover others) with `nemo inference providers list`. See
# references/nemo-platform-plugin-additions.md for the local-YAML alternative.
model_configs=[
dd.ModelConfig(
alias="text",
model="...",
provider="default/nvidia-build",
inference_parameters=dd.ChatCompletionInferenceParams(),
),
],
)
# Seed dataset (only if the user explicitly mentions a seed dataset path)
# config_builder.with_seed_dataset(dd.LocalFileSeedSource(path="path/to/seed.parquet"))
# config_builder.add_column(...)
# config_builder.add_processor(...)
return config_buildermodel_configsrunsubmitrun