Loading...
Loading...
A Just-In-Time (JIT) compiler for Python that translates a subset of Python and NumPy code into fast machine code. Developed by Anaconda, Inc. Highly effective for accelerating loops, custom mathematical functions, and complex numerical algorithms. Use for @njit, @vectorize, prange, cuda.jit, numba.typed, JIT compilation, parallel loops, GPU acceleration with CUDA, Monte Carlo simulations, numerical algorithms, and high-performance Python computing.
npx skill4agent add tondevrel/scientific-agent-skills numba@njit@vectorizeprangecuda.jitnumba.typedpip install numbaimport numpy as np
from numba import njit, prange, vectorize, guvectorize, cudaimport numpy as np
from numba import njit
# 1. Apply the @njit decorator (alias for @jit(nopython=True))
@njit
def sum_array(arr):
res = 0.0
# Standard Python loop that would be slow is now fast as C
for i in range(arr.shape[0]):
res += arr[i]
return res
# 2. Execute
data = np.random.random(1_000_000)
result = sum_array(data) # First call compiles, then runs@njit(parallel=True)prangerange@njit(cache=True)(float64[:],)numba.typed@njitfrom numba import njit
import pandas as pd
# ❌ BAD: Using Pandas inside @njit (Unsupported)
@njit
def bad_func(df):
return df['col'].sum() # Will raise a LoweringError
# ✅ GOOD: Pass NumPy arrays instead
@njit
def good_func(arr):
return arr.sum()
# ❌ BAD: Using @jit without nopython=True
from numba import jit
@jit
def slow_func(x): # This might fall back to "Object Mode" (slow)
return x + 1
# ✅ GOOD: Always ensure nopython mode
@njit
def fast_func(x):
return x + 1
# ❌ BAD: Manual loops in Python to call a JIT function
# for i in range(1000):
# process_element(arr[i]) # Calling JIT overhead 1000 times
# ✅ GOOD: Move the loop INSIDE the @njit function
@njit
def process_all(arr):
for i in range(arr.shape[0]):
process_element(arr[i])from numba import njit, prange
@njit(parallel=True)
def parallel_sum(A):
# Use prange for the loop that should be parallelized
s = 0
for i in prange(A.shape[0]):
s += A[i]
return sfrom numba import vectorize
# This creates a NumPy ufunc that supports broadcasting
@vectorize(['float64(float64, float64)'], target='parallel')
def fast_add(x, y):
return x + y
# Now you can use it on massive arrays
res = fast_add(arr1, arr2)from numba.typed import List, Dict
from numba import njit
@njit
def use_typed_list():
l = List()
l.append(1.0)
return lfrom numba import cuda
@cuda.jit
def my_kernel(io_array):
# Calculate thread indices
pos = cuda.grid(1)
if pos < io_array.size:
io_array[pos] *= 2
# Usage
data = np.ones(256)
threadsperblock = 32
blockspergrid = (data.size + (threadsperblock - 1)) // threadsperblock
my_kernel[blockspergrid, threadsperblock](data)import random
@njit(parallel=True)
def monte_carlo_pi(nsamples):
acc = 0
for i in prange(nsamples):
x = random.random()
y = random.random()
if (x**2 + y**2) < 1.0:
acc += 1
return 4.0 * acc / nsamplesfrom numba import njit
@njit
def apply_threshold(image, threshold):
M, N = image.shape
result = np.zeros_like(image)
for i in range(M):
for j in range(N):
if image[i, j] > threshold:
result[i, j] = 255
return result@njit
def solve_laplace(u, niters):
M, N = u.shape
for n in range(niters):
for i in range(1, M-1):
for j in range(1, N-1):
u[i, j] = 0.25 * (u[i+1, j] + u[i-1, j] + u[i, j+1] + u[i, j-1])
return ufast_func.inspect_types() # Prints color-coded annotated code@njit# ✅ GOOD:
@njit
def compute_into(out_arr, in_arr):
for i in range(in_arr.shape[0]):
out_arr[i] = in_arr[i] * 2# ❌ Problem: Changing a global variable won't affect the jitted function
K = 10
@njit
def f(x): return x + K
K = 20
f(1) # Result is still 11!
# ✅ Solution: Pass constants as arguments# ✅ Solution: Force nopython mode
@njit # If this throws error, fix the code instead of removing @njitnp.randomparallel=Truerandom.random()np.random.random()@jitnopython=Trueparallel=Trueprangecache=Truenumba.typed.Listnumba.typed.Dictinspect_types()