Find minimum difference between two vectors with numba

I’ve tried to optimize searching for minimum value between two numpy vectors with numba. There is speed up and result is correct until I use prange and parallel=True option. I understand that the issue is in sharing variables min_val, tmp, min_val_idx_a, min_val_idx_b during parallel execution (maybe with parallel threads). Is there way to overcome the problem and use numba in with parallel=True option ? (which makes my simple code 300x faster)

import numpy as np
from numba import jit, int32, void, double, prange

@jit(void(double[:], double[:], int32), nopython=True, parallel=True)
def lowest_value_numba(a, b, n): 
    
    # initialization
    min_val_idx_a, min_val_idx_b = 0, 0
    min_val = tmp = np.abs(a[0]-b[0])
    
    for i in prange(n):
#         print(i)
        for j in prange(i, n):
            tmp = np.abs(a[i]-b[j])
            
            if(tmp < min_val):
                min_val = tmp            
                min_val_idx_a = i
                min_val_idx_b = j
    print(min_val, min_val_idx_a, min_val_idx_b)
                
n = int(1e4)
a = np.random.uniform(low=0.0, high=1.0, size=n)
b = np.random.uniform(low=0.0, high=1.0, size=n)

# setting min value by setting the same valu efor a[n-1] and b[n-1] 
a[n-1], b[n-1] = 1, 1

%timeit -n 1 -r 1 lowest_value_numba(a, b, n)

JavaScript
​x
 
import numpy as np
from numba import jit, int32, void, double, prange
​
@jit(void(double[:], double[:], int32), nopython=True, parallel=True)
def lowest_value_numba(a, b, n): 
    
    # initialization
    min_val_idx_a, min_val_idx_b = 0, 0
    min_val = tmp = np.abs(a[0]-b[0])
    
    for i in prange(n):
#         print(i)
        for j in prange(i, n):
            tmp = np.abs(a[i]-b[j])
            
            if(tmp < min_val):
                min_val = tmp            
                min_val_idx_a = i
                min_val_idx_b = j
    print(min_val, min_val_idx_a, min_val_idx_b)
                
n = int(1e4)
a = np.random.uniform(low=0.0, high=1.0, size=n)
b = np.random.uniform(low=0.0, high=1.0, size=n)
​
# setting min value by setting the same valu efor a[n-1] and b[n-1] 
a[n-1], b[n-1] = 1, 1
​
%timeit -n 1 -r 1 lowest_value_numba(a, b, n)
​

output which is incorrect (it should be 0.0 9999 9999):

0.23648058275546968 0 0
223 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

JavaScript
 
0.23648058275546968 0 0
223 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
​

but for compilation with parallel=False output is correct (last values are most close to each other):

0.0 9999 9999
65 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

JavaScript
 
0.0 9999 9999
65 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
​

Answer

You can avoid the issues commented about cross iteration dependencies if you parallelize by rows. For example:

from numba import jit, int32, double, prange
from numba.types import Tuple

@jit(Tuple([double, int32, int32])(double[:], double[:], int32),
     nopython=True, parallel=True)
def lowest_value_numba(a, b, n):

    min_dif = np.empty_like(a)
    min_j = np.empty((n,), dtype=int32)

    for i in prange(n):
        diff = np.abs(a[i] - b)
        min_j[i] = j = np.argmin(diff)
        min_dif[i] = diff[j]

    i = np.argmin(min_dif)
    j = min_j[i]
    min_val = min_dif[i]

    return min_val, i, j

JavaScript
 
from numba import jit, int32, double, prange
from numba.types import Tuple
​
@jit(Tuple([double, int32, int32])(double[:], double[:], int32),
     nopython=True, parallel=True)
def lowest_value_numba(a, b, n):
​
    min_dif = np.empty_like(a)
    min_j = np.empty((n,), dtype=int32)
​
    for i in prange(n):
        diff = np.abs(a[i] - b)
        min_j[i] = j = np.argmin(diff)
        min_dif[i] = diff[j]
​
    i = np.argmin(min_dif)
    j = min_j[i]
    min_val = min_dif[i]
​
    return min_val, i, j
​

Results are consistent with your implementation (tested with parallel=False and for j in prange(n)) and with the brute force Numpy approach:

def lowest_value_numpy(a, b):
    diff = np.abs(np.atleast_2d(a).T - np.atleast_2d(b))
    indices = np.unravel_index(diff.argmin(), diff.shape)
    return diff[indices], *indices

JavaScript
 
def lowest_value_numpy(a, b):
    diff = np.abs(np.atleast_2d(a).T - np.atleast_2d(b))
    indices = np.unravel_index(diff.argmin(), diff.shape)
    return diff[indices], *indices
​

Advertisement

Answer