Loading...
Loading...
Use when writing DALI data loading or preprocessing code with `nvidia.dali.experimental.dynamic` (ndd), or when converting DALI pipeline-mode code to dynamic mode, or when the user asks about DALI dynamic mode, imperative DALI, or ndd. Use this skill any time someone mentions 'ndd', 'dynamic mode', or wants to load/augment data with DALI outside of a pipeline definition.
npx skill4agent add nvidia/skills dali-dynamic-modepipe.build()pipe.run()import nvidia.dali.experimental.dynamic as nddt = ndd.tensor(data) # copy
t = ndd.as_tensor(data) # wrap, no copy if possible
t.cpu() # move to CPU
t.gpu() # move to GPU
t.torch(copy=False) # conversion to PyTorch tensor with no copy (default)
t[1:3] # slicing supported
np.asarray(t) # NumPy via __array__ (CPU only)__dlpack____cuda_array_interface____array__b = ndd.batch([arr1, arr2]) # copy
b = ndd.as_batch(data) # wrap, no copy if possible__getitem__batch[i]TypeError| Intent | Method | Returns |
|---|---|---|
| Get sample i | | |
| Get subset of samples | | |
| Slice within each sample | | |
.select().slicexy = ndd.random.uniform(batch_size=16, range=[0, 1], shape=2)
crop_x = xy.slice[0] # Batch of 16 scalars, first element from each sample
crop_y = xy.slice[1] # Batch of 16 scalars, second element from each sample
sample_0 = xy.select(0) # Tensor, the entire first sample [x, y]batch.torch()batch.torch(pad=True)batch.torch(copy=None)__dlpack__ndd.as_tensor(batch)ndd.as_tensorpadTensor.torch(copy=False)for sample in batch:reader = ndd.readers.File(file_root=image_dir, random_shuffle=True)
for epoch in range(num_epochs):
for jpegs, labels in reader.next_epoch(batch_size=64):
# jpegs, labels are Batch objects
...labels.torch().to(device)ndd.readers.File(...)ndd.readers.COCO(...)ndd.readers.TFRecord(...)batch_sizenext_epoch()next_epoch(batch_size=N)Batchnext_epoch()Tensornext_epoch()next_epoch()reader = ndd.readers.File(
file_root=image_dir,
shard_id=rank, num_shards=world_size,
stick_to_shard=True,
pad_last_batch=True,
)device="gpu""mixed""mixed"device="gpu".cpu().torch().cpu()__array__eager.evaluate().torch()__dlpack____array__.shapewith ndd.EvalMode.sync_full:
images = ndd.decoders.image(jpegs, device="gpu")
images = ndd.resize(images, size=[224, 224])
# Any error surfaces here, at the exact op that faileddeferredeagersync_cpusync_fullEvalMode.sync_full.evaluate()sync_cpusync_fullndd.set_num_threads(4) # Call once at startup, only if necessary to override the defaultsDALI_NUM_THREADS# Approach 1: set the thread-local default seed (simple, good enough for most cases)
ndd.random.set_seed(42)
angles = ndd.random.uniform(batch_size=64, range=(-30, 30))
# Approach 2: explicit RNG object (finer control, pass rng= to each op)
rng = ndd.random.RNG(seed=42)
values = ndd.random.uniform(batch_size=64, range=[0, 1], shape=2, rng=rng)rng=batch_sizeimport nvidia.dali.experimental.dynamic as ndd
ndd.set_num_threads(4)
reader = ndd.readers.File(file_root="/data/imagenet/train", random_shuffle=True)
for epoch in range(num_epochs):
for jpegs, labels in reader.next_epoch(batch_size=64):
images = ndd.decoders.image(jpegs, device="gpu")
images = ndd.resize(images, size=[224, 224])
images = ndd.crop_mirror_normalize(
images,
mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
)
train_step(images.torch(), labels.torch())| Wrong | Right | Why |
|---|---|---|
| | |
| | |
| | |
| Let consumption trigger eval | |
| | Avoids wasteful D2H + H2D round-trip |
| Recreate reader each epoch | | Readers are stateful -- create once, reuse |
| | Reader classes are PascalCase |
| Exhaust iterator or create new reader | Iterator must be fully consumed before next |
No | | No pipeline-level batch size to inherit |
| Pipeline Mode | Dynamic Mode |
|---|---|
| Direct function calls in a loop |
| |
| |
| |
Pipeline-level | |
Pipeline-level | |
Pipeline-level | |
| |
| |
| |