Loading...
Loading...
Optimize MATLAB code for better performance through vectorization, memory management, and profiling. Use when user requests optimization, mentions slow code, performance issues, speed improvements, or asks to make code faster or more efficient.
npx skill4agent add matlab/skills matlab-performance-optimizer% Slow approach
n = 1000000;
result = zeros(n, 1);
for i = 1:n
result(i) = sin(i) * cos(i);
end% Fast approach
n = 1000000;
i = (1:n).';
result = sin(i) .* cos(i);% Very slow - array grows each iteration
result = [];
for i = 1:10000
result(end+1) = i^2;
end% Fast - preallocated array
n = 10000;
result = zeros(n, 1);
for i = 1:n
result(i) = i^2;
end% Slow
sum_val = 0;
for i = 1:length(x)
sum_val = sum_val + x(i);
end% Fast
sum_val = sum(x);.*./.^% Instead of this:
for i = 1:length(x)
y(i) = x(i)^2 + 2*x(i) + 1;
end
% Do this:
y = x.^2 + 2*x + 1;% Instead of this:
count = 0;
for i = 1:length(data)
if data(i) > threshold
count = count + 1;
filtered(count) = data(i);
end
end
filtered = filtered(1:count);
% Do this:
filtered = data(data > threshold);% Instead of this:
C = zeros(size(A, 1), size(B, 2));
for i = 1:size(A, 1)
for j = 1:size(B, 2)
for k = 1:size(A, 2)
C(i,j) = C(i,j) + A(i,k) * B(k,j);
end
end
end
% Do this:
C = A * B;cumsumcumprodcummaxcummin% Instead of this:
running_sum = zeros(size(data));
running_sum(1) = data(1);
for i = 2:length(data)
running_sum(i) = running_sum(i-1) + data(i);
end
% Do this:
running_sum = cumsum(data);% Instead of default double (8 bytes)
data = rand(1000, 1000); % 8 MB
% Use single precision when appropriate (4 bytes)
data = single(rand(1000, 1000)); % 4 MB
% Use integers when applicable
indices = uint32(1:1000000); % 4 MB instead of 8 MB% Dense matrix (wastes memory)
A = zeros(10000, 10000);
A(1:100, 1:100) = rand(100); % 800 MB
% Sparse matrix (efficient)
A = sparse(10000, 10000);
A(1:100, 1:100) = rand(100); % Only stores non-zeros% Process large data
largeData = loadData();
processedData = processData(largeData);
% Clear when no longer needed
clear largeData;
% Continue with processed data
results = analyze(processedData);% Instead of creating copies
A = A + 5; % In-place when possible
% Avoid unnecessary copies
B = A; % Creates copy if A is modified later
B = A + 0; % Forces copy% Profile code execution
profile on
myFunction(inputs);
profile viewer
profile off% Time single execution
tic;
result = myFunction(data);
elapsedTime = toc;
% Benchmark with timeit (more accurate)
timeit(@() myFunction(data))
% Compare multiple approaches
time1 = timeit(@() approach1(data));
time2 = timeit(@() approach2(data));
fprintf('Approach 1: %.6f s\nApproach 2: %.6f s\n', time1, time2);% SLOW
indices = find(x > 5);
y = x(indices);
% FAST
y = x(x > 5);% SLOW - repmat to match dimensions
A = rand(1000, 5);
B = rand(1, 5);
C = A - repmat(B, size(A, 1), 1);
% FAST - implicit expansion (R2016b+)
C = A - B;% SLOW - recalculates each iteration
for i = 1:n
result(i) = data(i) / sqrt(sum(data.^2));
end
% FAST - calculate once
norm_factor = sqrt(sum(data.^2));
for i = 1:n
result(i) = data(i) / norm_factor;
end
% EVEN FASTER - vectorize
result = data / sqrt(sum(data.^2));% SLOW - concatenating in loop
str = '';
for i = 1:1000
str = [str, sprintf('Line %d\n', i)];
end
% FAST - cell array + join
lines = cell(1000, 1);
for i = 1:1000
lines{i} = sprintf('Line %d', i);
end
str = strjoin(lines, '\n');
% FASTEST - vectorized sprintf
str = sprintf('Line %d\n', 1:1000);% Instead of separate arrays
names = cell(1000, 1);
ages = zeros(1000, 1);
scores = zeros(1000, 1);
% Use table
data = table(names, ages, scores);
% Faster access and better organization% Use built-in functions
filtered = conv(signal, kernel, 'same');
filtered = filter(b, a, signal);
% For 2D
filtered = conv2(image, kernel, 'same');
filtered = imfilter(image, kernel);
% FFT-based for large kernels (zero-pad for linear convolution)
nfft = length(signal) + length(kernel) - 1;
filtered = ifft(fft(signal, nfft) .* fft(kernel, nfft));% Instead of nested loops for pairwise distances
% SLOW
n = size(points, 1);
distances = zeros(n, n);
for i = 1:n
for j = 1:n
distances(i,j) = norm(points(i,:) - points(j,:));
end
end
% FAST - vectorized
distances = pdist2(points, points);% Presort for multiple searches
sortedData = sort(data);
% Binary search on sorted data
idx = find(sortedData >= value, 1, 'first');
% Use ismember for set operations
[isPresent, locations] = ismember(searchValues, data);
% Use unique for removing duplicates
uniqueData = unique(data);% Convert for to parfor for independent iterations
parfor i = 1:n
results(i) = expensiveFunction(data(i));
end% Create parallel pool
parpool('local', 4); % 4 workers
% Use parfeval for asynchronous parallel execution
futures = parfeval(@expensiveFunction, 1, data);
result = fetchOutputs(futures);
% GPU arrays for massive parallelization
gpuData = gpuArray(data);
result = arrayfun(@myFunction, gpuData);
result = gather(result); % Bring back to CPU% Create MEX file for bottleneck function
% Write myFunction.c, then compile:
% mex myFunction.c
% Call like regular MATLAB function
result = myFunction(inputs);function result = expensiveComputation(input)
persistent cachedData cachedInput
if isequal(input, cachedInput)
% Return cached result
result = cachedData;
return;
end
% Compute and cache
result = computeExpensiveOperation(input);
cachedData = result;
cachedInput = input;
endfunction result = jitFriendly(n)
result = 0;
for i = 1:n
result = result + i;
end
endfunction result = jitUnfriendly(n)
result = 0;
for i = 1:n
eval(['x' num2str(i) ' = i;']); % Dynamic code
end
endprofile on
myScript;
profile viewer% Before
time_before = timeit(@() myFunction(data));
% After optimization
time_after = timeit(@() myFunctionOptimized(data));
fprintf('Speedup: %.2fx\n', time_before/time_after);% SLOW - inner loop over columns (row-major traversal in column-major MATLAB)
for i = 1:rows
for j = 1:cols
A(i,j) = process(i, j);
end
end
% FAST - inner loop over rows (column-major traversal, contiguous memory)
for j = 1:cols
for i = 1:rows
A(i,j) = process(i, j);
end
end
% FASTEST - vectorized
[I, J] = ndgrid(1:rows, 1:cols);
A = process(I, J);% SLOW - repeated conversions
for i = 1:n
x = double(data(i));
result(i) = sin(x);
end
% FAST - convert once
x = double(data);
result = sin(x);% SLOW
[rows, cols] = size(image);
output = zeros(rows, cols);
for i = 2:rows-1
for j = 2:cols-1
output(i,j) = mean(image(i-1:i+1, j-1:j+1), 'all');
end
end
% FAST
kernel = ones(3,3) / 9;
output = conv2(image, kernel, 'same');% SLOW
n = size(data, 1);
means = zeros(n, 1);
for i = 1:n
means(i) = mean(data(i, :));
end
% FAST
means = mean(data, 2);% SLOW
n = length(signal);
movingAvg = zeros(size(signal));
window = 10;
for i = window:n
movingAvg(i) = mean(signal(i-window+1:i));
end
% FAST - trailing window: [window-1 past samples, 0 future samples]
movingAvg = movmean(signal, [window-1 0]);profile viewermemorydoctimeittic/tocparforgpuArraysparse