목록전체 글 (462)
Note
How to swap two rows of a dataframe? # Input df = pd.DataFrame(np.arange(25).reshape(5, -1)) # Solution def swap_rows(df, i1, i2): a, b = df.iloc[i1, :].copy(), df.iloc[i2, :].copy() df.iloc[i1, :], df.iloc[i2, :] = b, a return df print(swap_rows(df, 1, 2)) # output 0 1 2 3 4 0 0 1 2 3 4 1 10 11 12 13 14 2 5 6 7 8 9 3 15 16 17 18 19 4 20 21 22 23 24
How to rank items in a multidimensional array using numpy? # Input: np.random.seed(10) a = np.random.randint(20, size=[2,5]) print(a) # Solution print(a.ravel().argsort().argsort().reshape(a.shape)) # output [[ 9 4 15 0 17] [16 17 8 9 0]] [[4 2 6 0 8] [7 9 3 5 1]]
How to reshape a dataframe to the largest possible square after removing the negative values? # Input df = pd.DataFrame(np.random.randint(-20, 50, 100).reshape(10,-1)) print(df) # Solution # Step 1: remove negative values from arr arr = df[df > 0].values.flatten() arr_qualified = arr[~np.isnan(arr)] # Step 2: find side-length of largest possible square n = int(np.floor(arr_qualified.shape[0]**.5..
How to rank items in an array using numpy? np.random.seed(10) a = np.random.randint(20, size=10) print('Array: ', a) # Solution print(a.argsort().argsort()) print('Array: ', a) #> Array: [ 9 4 15 0 17 16 17 8 9 0] #> [4 2 6 0 8 7 9 3 5 1] #> Array: [ 9 4 15 0 17 16 17 8 9 0]
How to find and cap outliers from a series or dataframe column? # Input ser = pd.Series(np.logspace(-2, 2, 30)) # Solution def cap_outliers(ser, low_perc, high_perc): low, high = ser.quantile([low_perc, high_perc]) print(low_perc, '%ile: ', low, '|', high_perc, '%ile: ', high) ser[ser high] = high return(ser) capped_ser = cap_outliers(ser, .05, .95) # output 0.05 %ile: 0.0..