e.g. 1 Print arrays
x1 = np.arange(10) # numbers from 0 to 9 (note 10 is NOT included)
x2 = np.arange(2,7) # numbers from 2 to 6 (note 7 is NOT included)
print("x1 = "+str(x1))
print("x2 = "+str(x2)) #!!Note: should transfer to string to print!
#output:
x1 = [0 1 2 3 4 5 6 7 8 9]
x2 = [2 3 4 5 6]
e.g. 2 Array increasement
# Increments of 2.5. To ensure it includes the value 10, we make the endpoint slightly >10.
step = 2.5
x1 = np.arange(0,10+step/2,step)
print(x1)
x2 = np.linspace(0,10,5)
print(x2)
#output:
[ 0. 2.5 5. 7.5 10. ]
[ 0. 2.5 5. 7.5 10. ]
**Note: In numpy and NCL, rule of slicing array is: **
aa[i_start: i_end: slicing_step]
aa(i_start: i_end: slicing_step)
** But in MATLAB, it is different: **
aa(i_start: slicing_step: i_end)
** Do remeber it clearly or it will make mistakes!! **
e.g. 3 The power of range
z1 = np.array(range(2,21,2))
print("z1=" + str(z1))
z2 = np.abs(np.array(range(50, -51, -10)))
print("z2=" + str(z2))
#output:
z1=[ 2 4 6 8 10 12 14 16 18 20]
z2=[50 40 30 20 10 0 10 20 30 40 50]
Note: Arrays retain their datatype!
Unlike Matlab, arrays retain their originally declared datatype, despite any operations that may be performed on them. We can’t replace one element of an integer array with a float, and can’t divide integer elements and expect floats. Thus, we would better declare the tpye when making an array.
e.g. 4
x = np.array(range(2,6)) # declare an integer array
print("x="+str(x))
r = np.random.rand(1) # generate random float
print("r="+str(r))
x[0] = r # try assigning float as element
x[1] = x[1]/2 # try dividing odd integer by two
print("x="+str(x)) # the array stays integer!
print(x.dtype)
#output
x=[2 3 4 5]
r=[0.98797611]
x=[0 1 4 5]
int32
print("r="+str(r))
x = np.array(range(2,6), dtype=float) # declare an array of floats
print("x="+str(x))
x[0] = r # try assigning float as element
x[1] = x[1]/2 # try dividing odd integer by two
print("x="+str(x)) # this works fine!
#output
r=[0.08604006]
x=[2. 3. 4. 5.]
x=[0.08604006 1.5 4. 5. ]
x = np.array([2,3,4,5.]) # this also declares an array of floats
print("x="+str(x))
x[0] = r # try assigning float as element
x[1] = x[1]/2 # try dividing odd integer by two
print("x="+str(x)) # this works fine!
#output
x=[2. 3. 4. 5.]
x=[0.08604006 1.5 4. 5. ]
e.g. 5 Indexing and slicing vectors (important!)
x = np.random.rand(10) # 10 random elements uniformly distributed in [0,1]
x1 = x[2:5] # Elements 2 thru 5 (not including 5, indexing starts at 0)
x2 = x[:4] # Elements thru 4 (not including index 4, indexing starts at 0)
x3 = x[7:] # Elements 7 to end (indexing starts at 0)
xlast = x[-1] # The last element (i.e., index=length-1)
xslice = x[0:10:2] # Slicing the array every 2 elements
xslice2 = x[0::2] # Slicing the array every 2 elements
print("x= "+np.array_str(x,precision=3)) #!!control the output array precision!
print("x[2:5]= "+np.array_str(x1,precision=3))
print("x[:4]= "+np.array_str(x2,precision=3))
print("x[7:]= "+np.array_str(x3,precision=3))
print("xlast= {0:5.3f}".format(xlast))
print("xslice" + np.array_str(xslice,precision=3))
print("xslice2" + np.array_str(xslice2,precision=3))
#output
x= [0.979 0.816 0.135 0.315 0.557 0.672 0.403 0.279 0.113 0.326]
x[2:5]= [0.135 0.315 0.557]
x[:4]= [0.979 0.816 0.135 0.315]
x[7:]= [0.279 0.113 0.326]
xlast= 0.326
xslice = [0.979 0.135 0.557 0.403 0.113]
xslice2 = [0.979 0.135 0.557 0.403 0.113]
Note: Array slicing does not copy!
Slices of an array are merely pointers to the original array. So, if you modify the slice, you modify the original array! If you want to manipulate a slice independently of the original array, you must explicity create a new copy of that slice.
e.g. 6
r = np.random.rand(1)
a = np.array([1, 2, 3, 4],dtype=float)
b = a[0:2] # slice of a
b[0] = r # this will change both a and b!
print("a=" + str(a))
print("b=" + str(b))
a = np.array([1, 2, 3, 4],dtype=float)
c = np.array(a[0:2]) # this creates a new copy from the slice
c[0] = r # this only changes c
print("a="+str(a))
print("c=" + str(c))
#output
a=[0.75570136 2. 3. 4. ]
b=[0.75570136 2. ]
a=[1. 2. 3. 4.]
c=[0.75570136 2. ]
e.g. 7 Loops
mylist = ['a', 'b', 'c']
for k in mylist:
print(k)
#access both the index into the list and the list elements:
for k, element in enumerate(mylist):
print(k,element)
#output
a b c
0 a
1 b
2 c
e.g. 8 Reshape
X = np.arange(24).reshape(2,3,4) # shape = (2,3,4)
print(X)
#output
X=[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
e.g. 9 Broadcasting: mean removal
Suppose that X is a data matrix of shape (n,p). That is, there are n data points and p features per point. Often, we have to remove the mean from each feature.
# Generate some random data
n = 100
p = 5
X = np.random.rand(n,p)
print(np.shape(X))
# Compute the mean per column using the axis command
Xm = np.mean(X,axis=0) # This is a p-dim matrix
print(np.shape(Xm))
print(np.shape(Xm[None,:]))
# Subtract the mean
X_demean = X - Xm[None,:]
print(np.shape(X_demean))
X_demean = X - Xm
print(np.shape(X_demean))
#output
(100, 5)
(5,)
(1, 5)
(100, 5)
(100, 5)
e.g. 9 Broadcasting: distance
Here is a more complicated example. Suppose we have a data matrix X of shape (nx,p) and a second set of points, Y of shape (ny,p). For each i and j, we want to compute the distances,
d[i,j] = np.sum((X[i,:] - Y[j,:])**2)
This represents the distances between the vectors X[i,:] and Y[j,:]. We can do this without a for loop as follows.
# Some random data
nx = 100
ny = 10
p = 2
X = np.random.rand(nx,p)
print('X shape = ' + str(np.shape(X)))
Y = np.random.rand(ny,p)
print('Y shape = ' + str(np.shape(Y)))
# Computing the distances in two lines. No for loop!
DXY = X[:,None,:]-Y[None,:,:]
print('X[:,None,:] shape = ' + str(np.shape(X[:,None,:])))
print('Y[:,None,:] shape = ' + str(np.shape(Y[None,:,:])))
print('DXY shape = ' + str(np.shape(DXY)))
d = np.sum(DXY**2,axis=2)
print('d shape = ' + str(np.shape(d)))
#output
X shape = (100, 2)
Y shape = (10, 2)
X[:,None,:] shape = (100, 1, 2)
Y[:,None,:] shape = (1, 10, 2)
DXY shape = (100, 10, 2)
d shape = (100, 10)
e.g.10 Broadcasting: outer product
The outer product of vectors x and y is the matrix Z[i,j] = x[i]y[j].
# Some random data
nx = 100
ny = 10
x = np.random.rand(nx)
y = np.random.rand(ny)
# Compute the outer product in one line
Z = x[:,None]*y[None,:] # x[:,None] has shape (nx, 1); y[None,:] has shape (1, ny)
print('Z shape = '+ str(np.shape(Z)))
#output
Z shape = (100, 10)
e.g. 11 The work space
whos
#output
Variable Type Data/Info
-------------------------------
a ndarray 4: 4 elems, type `float64`, 32 bytes
b ndarray 2: 2 elems, type `float64`, 16 bytes
c ndarray 2: 2 elems, type `float64`, 16 bytes
element str cranberry
f int 3
k int 2
mylist list n=3
np module <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
plt module <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>
r ndarray 1: 1 elems, type `float64`, 8 bytes
step float 2.5
t ndarray 100: 100 elems, type `float64`, 800 bytes
x ndarray 11: 11 elems, type `float64`, 88 bytes
x1 ndarray 3: 3 elems, type `float64`, 24 bytes
x2 ndarray 4: 4 elems, type `float64`, 32 bytes
x3 ndarray 4: 4 elems, type `float64`, 32 bytes
xlast float64 0.27141830443616755
y ndarray 100: 100 elems, type `float64`, 800 bytes
ysq ndarray 100: 100 elems, type `float64`, 800 bytes
del a, b, c #delete variables
whos
#output
Variable Type Data/Info
-------------------------------
element str c
k int 2
mylist list n=3
np module <module 'numpy' from 'D:\<...>ges\\numpy\\__init__.py'>
r ndarray 1: 1 elems, type `float64`, 8 bytes
step float 2.5
x ndarray 4: 4 elems, type `int32`, 16 bytes
x1 ndarray 3: 3 elems, type `int32`, 12 bytes
x2 ndarray 1: 1 elems, type `int32`, 4 bytes
x3 ndarray 3: 3 elems, type `int32`, 12 bytes
xlast float64 0.32608577784906123
z1 ndarray 10: 10 elems, type `int32`, 40 bytes
z2 ndarray 11: 11 elems, type `int32`, 44 bytes
e.g. 12 Initializing a numpy array of string data
The numpy string array is limited by its fixed length (default: length of 1). If the length of the strings are unsure or not equal, we can initialize the numpy string array by setting the data type as dtype=object
:
newstr_array = numpy.empty((N, M), dtype=object)
e.g. 13 Sort N-D numpy array based on one column (np.argsort)
e.g. sorting a 2-cloumn array based on value of the first column by np.argsort
Z1=np.transpose(np.vstack((precp_Z1, d18Op_Z1)))
Z2=np.transpose(np.vstack((precp_Z2, d18Op_Z2)))
Z1_sort = HS1_Z1[np.argsort(Z1[:,0])]
Z2_sort = LGM_Z2[np.argsort(Z2[:,0])]
# reverse order!
x = np.random.random((5,2))
x_new = x[np.argsort(x[:,0])[::-1]]
e.g. 14 np.around, np.floor, np.ceil
rounding:
np.around(a, decimals=0, out=None)
decimals 要舍入的小数位数。 默认值为0。 如果为负,整数将四舍五入到小数点左侧的位置
data = np.array([-0.732, 4.6, 9.4, 7.449, 10.455, 11.555])
around1 = np.around(data)
print(around1) # [ -1. 5. 9. 7. 10. 12.]
around2 = np.around(n, decimals=1)
print(around2) # [ -0.7 4.6 9.4 7.4 10.5 11.6]
around3 = np.around(n, decimals=-1)
print(around3) # [ -0. 0. 10. 10. 10. 10.]
flooring: take the integer of a number downward
np.floor(a)
ceiling: take the integer of a number upward
np.ceil(a)
data = np.array([-2.8, 2.1, 3.])
np.floor(data) # [-3., 2., 3.]
np.ceil(data) # [-2., 3., 3.]
e.g. 15 read and save txt file in numpy
e.g.
Andes_loc = np.zeros((len(Andes_lat),2), dtype='int')
Andes_loc[:,0] = Andes_lon
Andes_loc[:,1] = Andes_lat
# output index
np.savetxt('Andes_locidx.txt', Andes_loc, fmt='%d')
# read data into an array
locIdx = np.loadtxt('Andes_locidx.txt', Andes_loc, fmt='%d')
e.g. 16 replicate arrays
e.g.
# For one dimensional array:
aa = [1,2,3,4]
bb = np.tile(aa,3) # [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]
cc=np.repeat(aa,3) # [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
# For N dimensional array: [time, lat, lon].
monClim_d18Ov = np.tile(monClim_d18Ov0, (20,1,1,1)) # repeat/tile seasonal climatology of each month for 20 times: Jan, Feb,..., Nov, Dec, Jan, Feb, ...
yrClim_d18Ov = np.repeat(yrClim_d18Ov0, 12, axis=0) # repeat annual mean for 12 times, so all 12 month for each year will have the same value
e.g. 17 Check if elements of an array exist elements of another array: np.isin()
e.g.
time_TES_monlist = [str(Tm[6:].decode("utf-8")) for Tm in time_TES_all] # extract months
# calculate monthly climatology
monList = ['01','02','03','04','05','06','07','08','09','10','11','12']
Isinmon = []
for im in monList:
Isinmon.append(np.isin(time_TES_monlist, im))
e.g. 18 Sets processing
e.g.
aa = [1,2,3,4]
bb = [3,4,5,6]
print(set(aa)&set(bb)) # intersection {3,4}
print(set(aa)^set(bb)) # complement {1, 2, 5, 6, 7}
Last update: 11/04/2022