听说python科学计算的numbpy
和pandas
特别厉害,于是想尝试一下,搜素知乎,发现了这篇问答:如何系统地学习Python 中 matplotlib, numpy, scipy, pandas?,顺藤摸瓜,发现了几个学习资料,暂且收集在这里。
ok,资料齐全,接下来开始学习。
NumPy 基础:数组和矢量计算
import numpy as np
import pandas as pd
print 'hello world'
hello world
创建ndarray
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1
array([ 6. , 7.5, 8. , 0. , 1. ])
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2
array([[1, 2, 3, 4],
[5, 6, 7, 8]])
arr2.ndim
2
arr2.shape
(2L, 4L)
arr2.dtype
dtype('int32')
np.zeros(10)
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.zeros((3,6))
array([[ 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0.]])
np.ones((3,4))
array([[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.],
[ 1., 1., 1., 1.]])
np.empty((2,3,4))
array([[[ 1.82577335e-316, 8.73107750e-316, 1.81818490e-316,
3.13307342e-316],
[ 2.08418905e-316, 8.73107987e-316, 1.81818490e-316,
8.72472026e-316],
[ 2.08418905e-316, 8.73108224e-316, 1.81818490e-316,
8.72315545e-316]],
[[ 2.08418905e-316, 8.73108462e-316, 1.81818490e-316,
8.72802101e-316],
[ 2.08418905e-316, 8.73108699e-316, 1.81818490e-316,
8.72462540e-316],
[ 2.08418905e-316, 8.72808228e-316, 1.81818490e-316,
8.73047909e-316]]])
np.arange(15)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
np.eye(10)
array([[ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
np.identity(10)
array([[ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
b = np.array([(1.5,2,3),(4,5,6)])
b
array([[ 1.5, 2. , 3. ],
[ 4. , 5. , 6. ]])
c = np.array([[1,2],[3,4]],dtype = complex)
c
array([[ 1.+0.j, 2.+0.j],
[ 3.+0.j, 4.+0.j]])
a = np.arange(10)
print a
[0 1 2 3 4 5 6 7 8 9]
b = np.arange(12).reshape(4,3)
print b
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]]
c = np.arange(24).reshape(2,3,4)
print c
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]
数组类型
arr = np.array([1,2,3,4,5])
arr.dtype
dtype('int32')
float_arr = arr.astype(np.float32)
arr.dtype
dtype('int32')
arr = np.array([3.7,-1.2,-2.6,0.5,12.9,10.1])
arr.dtype
dtype('float64')
arr.astype(np.int32)
array([ 3, -1, -2, 0, 12, 10])
numeric_strings = np.array(['1.25','-9.6','42'],dtype = np.string_)
numeric_strings.dtype
dtype('S4')
numeric_strings.astype(float)
array([ 1.25, -9.6 , 42. ])
arr = np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
索引和切片
arr[5]
5
arr[5:8]
array([5, 6, 7])
arr[5:8] =12
arr
array([ 0, 1, 2, 3, 4, 12, 12, 12, 8, 9])
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr_slice
array([ 12, 12345, 12])
arr
array([ 0, 1, 2, 3, 4, 12, 12345, 12, 8, 9])
arr_slice[:] = 64
arr_slice
arr
array([ 0, 1, 2, 3, 4, 64, 64, 64, 8, 9])
a = np.arange(10)
a[::-1]
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
a[2:9:3]#[start:end:step]
array([2, 5, 8])
基本运算
a = np.array([20,30,40,50])
b = np.arange(4)
b
array([0, 1, 2, 3])
c = a-b c
NumPy中的乘法运算符 * 指示按元素计算,矩阵乘法可以使用 dot 函数或创建矩阵对象实现
A = np.array([[1,1],[0,1]])
B = np.array([[2,0],[3,4]])
A*B
array([[2, 0],
[0, 4]])
np.dot(A,B)
array([[5, 4],
[3, 4]])
可视化
import matplotlib.pyplot as plt
%matplotlib inline
x = np.linspace(0,3,20)
y = np.linspace(0,9,20)
plt.plot(x,y)
plt.show()
image = np.random.rand(30,30)
plt.imshow(image,cmap=plt.cm.hot)
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0xc31ccf8>
Worked Example: data statistics
data = np.loadtxt('C:\Users\zluck\Documents\Python\Python materials\scipy-lecture-notes-master\data\populations.txt')
year,hares,lynxes,carrots = data.T
plt.axes([0.2,0.1,0.5,0.8])
plt.plot(year,hares,year,lynxes,year,carrots)
plt.legend(('Hare','Lynx','Carrot'),loc = (1.05,0.5))
<matplotlib.legend.Legend at 0xd0c2b70>
populations = data[:,1:]
populations.mean(axis=0)
array([ 34080.95238095, 20166.66666667, 42400. ])
#Which species has the highest population each year
np.argmax(populations,axis=1)
array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2], dtype=int64)
Broadcasting
A lot of grid-based or network-based problems can also use broadcasting. For instance, if we want to compute the distance from the origin of points on a 10x10 grid, we can do
x,y = np.arange(5),np.arange(5)[:,np.newaxis]
distance = np.sqrt(x**2+y**2)
distance
array([[ 0. , 1. , 2. , 3. , 4. ],
[ 1. , 1.41421356, 2.23606798, 3.16227766, 4.12310563],
[ 2. , 2.23606798, 2.82842712, 3.60555128, 4.47213595],
[ 3. , 3.16227766, 3.60555128, 4.24264069, 5. ],
[ 4. , 4.12310563, 4.47213595, 5. , 5.65685425]])
plt.pcolor(distance)
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0xd390cf8>
x,y = np.ogrid[0:5,0:5]
x,y
(array([[0],
[1],
[2],
[3],
[4]]), array([[0, 1, 2, 3, 4]]))
x.shape,y.shape
((5L, 1L), (1L, 5L))
distance = np.sqrt(x**2+y**2)
distance
array([[ 0. , 1. , 2. , 3. , 4. ],
[ 1. , 1.41421356, 2.23606798, 3.16227766, 4.12310563],
[ 2. , 2.23606798, 2.82842712, 3.60555128, 4.47213595],
[ 3. , 3.16227766, 3.60555128, 4.24264069, 5. ],
[ 4. , 4.12310563, 4.47213595, 5. , 5.65685425]])
x,y = np.mgrid[0:4,0:4]
x
array([[0, 0, 0, 0],
[1, 1, 1, 1],
[2, 2, 2, 2],
[3, 3, 3, 3]])
y
array([[0, 1, 2, 3],
[0, 1, 2, 3],
[0, 1, 2, 3],
[0, 1, 2, 3]])
Array shape manipulation
Flattening
a = np.array([[1,2,3],[4,5,6]])
a.ravel()
array([1, 2, 3, 4, 5, 6])
a.T
array([[1, 4],
[2, 5],
[3, 6]])
a.T.ravel()
array([1, 4, 2, 5, 3, 6])
Reshaping
a.shape
(2L, 3L)
b = a.ravel()
b = b.reshape((2,3))
b
array([[1, 2, 3],
[4, 5, 6]])
b[0,0] = 99
a
array([[99, 2, 3],
[ 4, 5, 6]])
a = np.zeros((3,2))
b = a.T.reshape((3*2))
b
array([ 0., 0., 0., 0., 0., 0.])
b[0]=9
a
array([[ 0., 0.],
[ 0., 0.],
[ 0., 0.]])
Dimension shuffling
a = np.arange(4*3*2).reshape(4,3,2)
a.shape
(4L, 3L, 2L)
a[0,2,1]
5
b = a.transpose(1,2,0)
b.shape
(3L, 2L, 4L)
b[2,1,0]
5
b[2,1,0] = -1
a[0,2,1]
-1
Sorting Data
a = np.array([[4,3,5],[1,2,1]])
b = np.sort(a,axis = 1)
b
array([[3, 4, 5],
[1, 1, 2]])
a.sort(axis = 1)
a
array([[1, 1, 2],
[3, 4, 5]])
a = np.array([4,3,1,2])
j = np.argsort(a)
j
array([2, 3, 1, 0], dtype=int64)
a[j]
array([1, 2, 3, 4])
a.sort()
a
array([1, 2, 3, 4])
a = np.array([4,3,1,2])
j_max = np.argmax(a)
j_min = np.argmin(a)
j_max, j_min
(0, 2)
samples = np.zeros((6,),dtype = [('sensor_code','S4'),('position',float),('value',float)])
samples
array([('', 0.0, 0.0), ('', 0.0, 0.0), ('', 0.0, 0.0), ('', 0.0, 0.0),
('', 0.0, 0.0), ('', 0.0, 0.0)],
dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])
samples.ndim
1
samples.shape
(6L,)
samples.dtype.names
('sensor_code', 'position', 'value')
samples[:] = [('ALFA', 1, 0.37), ('BETA', 1, 0.11), ('TAU', 1, 0.13),
('ALFA', 1.5, 0.37), ('ALFA', 3, 0.11), ('TAU', 1.2, 0.13)]
samples
array([('ALFA', 1.0, 0.37), ('BETA', 1.0, 0.11), ('TAU', 1.0, 0.13),
('ALFA', 1.5, 0.37), ('ALFA', 3.0, 0.11), ('TAU', 1.2, 0.13)],
dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])
samples['sensor_code']
array(['ALFA', 'BETA', 'TAU', 'ALFA', 'ALFA', 'TAU'],
dtype='|S4')
samples['value']
array([ 0.37, 0.11, 0.13, 0.37, 0.11, 0.13])
samples[0]
('ALFA', 1.0, 0.37)
samples[['position','value']]
array([(1.0, 0.37), (1.0, 0.11), (1.0, 0.13), (1.5, 0.37), (3.0, 0.11),
(1.2, 0.13)],
dtype=[('position', '<f8'), ('value', '<f8')])
samples[samples['sensor_code']=='ALFA']
array([('ALFA', 1.0, 0.37), ('ALFA', 1.5, 0.37), ('ALFA', 3.0, 0.11)],
dtype=[('sensor_code', 'S4'), ('position', '<f8'), ('value', '<f8')])
x = np.arange(10)
y = np.array([2,7,13])
np.in1d(x,y) #similar to %in% of R
array([False, False, True, False, False, False, False, True, False, False], dtype=bool)
maskedarray: dealing with (propagation of) missing data
For floats one could use NaN’s, but masks work for all types:
x = np.ma.array([1,2,3,4],mask = [0,1,0,1])
x
masked_array(data = [1 -- 3 --],
mask = [False True False True],
fill_value = 999999)
y = np.ma.array([1,2,3,4],mask = [0,1,1,1])
x+y
masked_array(data = [2 -- -- --],
mask = [False True True True],
fill_value = 999999)
np.ma.sqrt([1,-1,2,-2])
masked_array(data = [1.0 -- 1.4142135623730951 --],
mask = [False True False True],
fill_value = 1e+20)
Advanced operations
Polynomials
for example, $3x^2 + 2x + 1$:
p = np.poly1d([3,2,-1])
p(0)
-1
p.roots
array([-1. , 0.33333333])
p.order
2
x = np.linspace(0,1,20)
y = np.cos(x) + 0.3*np.random.rand(20)
p = np.poly1d(np.polyfit(x,y,3))
t = np.linspace(0,1,200)
plt.plot(x,y,'o',t,p(t),'-')
[<matplotlib.lines.Line2D at 0xcb74e80>,
<matplotlib.lines.Line2D at 0xcb74f60>]
Loading data files
pwd
u'C:\\Users\\zluck\\Documents\\Python'
cd C:\Users\zluck\Documents\Python\Python materials\scipy-lecture-notes-master\data
C:\Users\zluck\Documents\Python\Python materials\scipy-lecture-notes-master\data
data = np.loadtxt('populations.txt')
data
array([[ 1900., 30000., 4000., 48300.],
[ 1901., 47200., 6100., 48200.],
[ 1902., 70200., 9800., 41500.],
[ 1903., 77400., 35200., 38200.],
[ 1904., 36300., 59400., 40600.],
[ 1905., 20600., 41700., 39800.],
[ 1906., 18100., 19000., 38600.],
[ 1907., 21400., 13000., 42300.],
[ 1908., 22000., 8300., 44500.],
[ 1909., 25400., 9100., 42100.],
[ 1910., 27100., 7400., 46000.],
[ 1911., 40300., 8000., 46800.],
[ 1912., 57000., 12300., 43800.],
[ 1913., 76600., 19500., 40900.],
[ 1914., 52300., 45700., 39400.],
[ 1915., 19500., 51100., 39000.],
[ 1916., 11200., 29700., 36700.],
[ 1917., 7600., 15800., 41800.],
[ 1918., 14600., 9700., 43300.],
[ 1919., 16200., 10100., 41300.],
[ 1920., 24700., 8600., 47300.]])
np.savetxt('pop2.txt',data)
data2 = np.loadtxt('pop2.txt')
img = plt.imread('elephant.png')
img.shape,img.dtype
((200L, 300L, 3L), dtype('float32'))
plt.imshow(img)
<matplotlib.image.AxesImage at 0xc422cc0>
plt.savefig('plot.png')
<matplotlib.figure.Figure at 0xc3287b8>
plt.imsave('red_element',img[:,:,0],cmap = plt.cm.gray)
plt.imshow(plt.imread('red_elephant.png'))
<matplotlib.image.AxesImage at 0xcea10f0>