I’m using numpy.random.shuffle
in order to compute a statistic on randomized columns of a 2D array. The Python code is as follows:
JavaScript
x
8
1
import numpy as np
2
3
def timeline_sample(series, num):
4
random = series.copy()
5
for i in range(num):
6
np.random.shuffle(random.T)
7
yield random
8
The speed I get is something like this:
JavaScript
1
3
1
import numpy as np
2
arr = np.random.sample((50, 5000))
3
JavaScript
1
4
1
%%timeit
2
for series in timeline_sample(rnd, 100):
3
np.sum(series)
4
1 loops, best of 3: 391 ms per loop
I tried to Cythonize this function but I wasn’t sure how to replace the call to np.random.shuffle
and the function was 3x slower. Does anyone know how to accelerate or replace this? It is currently the bottleneck in my program.
Cython code:
JavaScript
1
15
15
1
cimport cython
2
3
import numpy as np
4
cimport numpy as np
5
6
7
@cython.boundscheck(False)
8
@cython.wraparound(False)
9
def timeline_sample2(double[:, ::1] series, int num):
10
cdef double[:, ::1] random = series.copy()
11
cdef int i
12
for i in range(num):
13
np.random.shuffle(random.T)
14
yield random
15
Advertisement
Answer
It’s likely that this will give a nice speed boost:
JavaScript
1
33
33
1
from timeit import Timer
2
3
import numpy as np
4
arr = np.random.sample((50, 5000))
5
6
def timeline_sample(series, num):
7
random = series.copy()
8
for i in range(num):
9
np.random.shuffle(random.T)
10
yield random
11
12
def timeline_sample_fast(series, num):
13
random = series.T.copy()
14
for i in range(num):
15
np.random.shuffle(random)
16
yield random.T
17
18
def timeline_sample_faster(series, num):
19
length = arr.shape[1]
20
for i in range(num):
21
yield series[:, np.random.permutation(length)]
22
23
def consume(iterable):
24
for s in iterable:
25
np.sum(s)
26
27
min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))
28
min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))
29
min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))
30
#>>> 0.2585161680035526
31
#>>> 0.2416607110062614
32
#>>> 0.04835709399776533
33
Forcing it to be contiguous does increase the time, but not by a ton:
JavaScript
1
11
11
1
def consume(iterable):
2
for s in iterable:
3
np.sum(np.ascontiguousarray(s))
4
5
min(Timer(lambda: consume(timeline_sample(arr, 1))).repeat(10, 10))
6
min(Timer(lambda: consume(timeline_sample_fast(arr, 1))).repeat(10, 10))
7
min(Timer(lambda: consume(timeline_sample_faster(arr, 1))).repeat(10, 10))
8
#>>> 0.2632228760048747
9
#>>> 0.25778737501241267
10
#>>> 0.07451769898761995
11