How to create 2d array with numpy random.choice for every rows?

Question

I'm trying to create a 2d array (which is a six column and lots of rows) with numpy random choice with unique values between 1 and 50 for every row not all of the array But this raises an error. Is it possible to make this with an one liner without a loop Edit Okey i get the answer. These

Accepted Answer

Here is a constructive approach, draw first (50 choices), second (49 choices) etc. For large sets it’s quite competitive (pp in table):# n = 10# pp 0.18564210 ms# Divakar 0.01960790 ms# James 0.20074140 ms# CK 0.17823420 ms# n = 1000# pp 0.80046050 ms# Divakar 1.31817130 ms# James 18.93511460 ms# CK 20.83670820 ms# n = 1000000# pp 655.32905590 ms# Divakar 1352.44713990 ms# James 18471.08987370 ms# CK 18369.79808050 ms# pp checking plausibility...# var (exp obs) 208.333333333 208.363840259# mean (exp obs) 25.5 25.5064865# Divakar checking plausibility...# var (exp obs) 208.333333333 208.21113972# mean (exp obs) 25.5 25.499471# James checking plausibility...# var (exp obs) 208.333333333 208.313436938# mean (exp obs) 25.5 25.4979035# CK checking plausibility...# var (exp obs) 208.333333333 208.169585249# mean (exp obs) 25.5 25.49Code including benchmarking. Algo is a bit complicated because mapping to free spots is hairy:import numpy as npimport typesfrom timeit import timeitdef f_pp(n): draw = np.empty((n, 6), dtype=int) # generating random numbers is expensive, so draw a large one and # make six out of one draw[:, 0] = np.random.randint(0, 50*49*48*47*46*45, (n,)) draw[:, 1:] = np.arange(50, 45, -1) draw = np.floor_divide.accumulate(draw, axis=-1) draw[:, :-1] -= draw[:, 1:] * np.arange(50, 45, -1) # map the shorter ranges (:49, :48, :47) to the non-occupied # positions; this amounts to incrementing for each number on the # left that is not larger. the nasty bit: if due to incrementing # new numbers on the left are "overtaken" then for them we also # need to increment. for i in range(1, 6): coll = np.sum(draw[:, :i] <= draw[:, i, None], axis=-1) collidx = np.flatnonzero(coll) if collidx.size == 0: continue coll = coll[collidx] tot = coll while True: draw[collidx, i] += coll coll = np.sum(draw[collidx, :i] <= draw[collidx, i, None], axis=-1) relidx = np.flatnonzero(coll > tot) if relidx.size == 0: break coll, tot = coll[relidx]-tot[relidx], coll[relidx] collidx = collidx[relidx] return draw + 1def check_result(draw, name): print(name[2:], ' checking plausibility...') import scipy.stats assert all(len(set(row)) == 6 for row in draw) assert len(set(draw.ravel())) == 50 print(' var (exp obs)', scipy.stats.uniform(0.5, 50).var(), draw.var()) print(' mean (exp obs)', scipy.stats.uniform(0.5, 50).mean(), draw.mean())def f_Divakar(n): return np.random.rand(n, 50).argpartition(6,axis=1)[:,:6]+1def f_James(n): return np.stack([np.random.choice(np.arange(1,51),size=6,replace=False) for i in range(n)])def f_CK(n): return np.array([np.random.choice(np.arange(1, 51), size=6, replace=False) for _ in range(n)])for n in (10, 1_000, 1_000_000): print(f'n = {n}') for name, func in list(globals().items()): if not name.startswith('f_') or not isinstance(func, types.FunctionType): continue try: print("{:16s}{:16.8f} ms".format(name[2:], timeit( 'f(n)', globals={'f':func, 'n':n}, number=10)*100)) except: print("{:16s} apparently failed".format(name[2:])) if(n >= 10000): for name, func in list(globals().items()): if name.startswith('f_') and isinstance(func, types.FunctionType): check_result(func(n), name)

Advertisement

Answer