#!/usr/bin/env python # coding: utf-8 # # Numpy # # The Numpy package provides data types and arrays that are the de facto standard for scientific computing in Python. This is the case because standard Python floats and ints have variable precision, which are extremely inefficient for heavy number crushing. # # Let's start with a simple array: # In[1]: import numpy as np your_first_array = np.array([4.3, 2.3, 2.3]) your_first_array[1] = 34.12 your_first_array[0] your_first_array # As you can see, they work in a very similar fashion as to the built-in list, however you can do mathematical operations point-wise, like this: # In[2]: your_first_array * 3.2 # It also has built-in support for multi-dimensional arrays: # In[3]: md_array = np.array([[1.3, 2.3, 3.3], [4.2, 5.0, 6.0]]) md_array[1, 2] = 21.4 md_array # Take a look at md_array, you can get its dimensions by using method shape, which returns a tuple. Try it: md_array.shape. You also easily change the shape of the array using reshape: # In[4]: md_array_reshaped = md_array.reshape((3,2)) print(md_array.shape) print(md_array_reshaped.shape) # He are some useful method for generating arrays: # In[5]: array1 = np.arange(10) array2 = np.arange(4, 10) array3 = np.arange(5, 15).reshape((2, 5)) array4 = np.arange(5, 15).reshape((5, 2)) array5 = np.zeros((3, 4)) array6 = np.ones((2, 6)) # There's also an important one: # In[6]: array7 = np.empty((5, 3)) # This method "fills" the array with whatever is the memory of the computer in the "next" space available, it's slightly cheaper (for the computer to process) compared to np.zeros or np.ones. # # One can also fill the array with random numbers (with uniform distribution between 0 and 1): # In[7]: array8 = np.random.random((5, 3)) # ## Array copy # # Consider the following code: # In[8]: some_array = np.array([[1.3, 2.3, 3.3], [4.2, 5.0, 6.0]]) some_other_array = some_array.reshape((3, 2)) some_array[0, 0] = 32.4 some_array some_other_array # As you can see, when we changed the value of some_array[0, 0], we also changed the value of some_other_array[0, 0]. This is because of the way of some_other_array was "constructed", it happens that reshape method provides with an array that points to the same data in memory (this is called **view** of an array), only with a different shape on the programmers point of view. # # One advantage of this is that it avoids the requiring the computer to copy data from array to the other, which can be expensive for large arrays and/or when done repetitively. So, in order to obtain reshaped copy of some_array you can use: # In[9]: some_array = np.array([[1.3, 2.3, 3.3], [4.2, 5.0, 6.0]]) some_array_copy = some_array.reshape((3, 2)).copy() some_array[0, 0] = 221.4 some_array some_array_copy # ## Slicing # # Numpy arrays support advanced slicing, here's some examples to play with: # In[10]: some_array = np.arange(20).reshape((4,5)) some_array[:, 2] some_array[:, 0:3] some_array[:, :2] some_array[:, 1:] some_array[2] some_array[2, :] some_array[1:] some_array[1:, :] some_array[1:3, 1:] some_array[1:, :2] some_array[1:, 2] some_array[[2,3]] some_array[[2,3], :] some_array[[2,3], [0,2]] some_array[[2,3], 1:] # It's also possible to use negative where's -1 means the last element, -2 the penultimate, and so on: # In[11]: some_array[-1, 2] some_array[-3:-1, 2] # It's also possible to jump though steps like this: # In[12]: some_array = np.arange(20) some_array[::2] some_array[::3] # And of course, something more advanced combining all this: # In[13]: some_array = np.arange(20).reshape((4,5)) some_array[-1, ::3] some_array[::2, 0:3] some_array[:, 1:4:2] # ## Operations # # Numpy provides a bunch of operations of arrays with arrays: # In[14]: a1 = np.arange(20).reshape((4, 5)) a2 = np.arange(20).reshape((4, 5)) a3 = a1 + a2 a4 = a1 + a2 * 3 #Power each element of an array by 2 a1 ** 2 #Divide each element of an array by 2 a1 / 2 #This also works as expected a3 += a4 a5 = np.arange(1, 21).reshape((5, 4)) #Matrix power np.dot(a4, a5) #Exponential and log of each element of the matrix np.exp(a4) np.log(a5) # ## Data types # As said earlier, the reason numpy array is the standard on cientific computing library is due to its limited precision floating points and integers. Let's take a better look at this now: # In[15]: x = np.arange(10) x.dtype # In the example above, each element of `x` is a 64 bits integers, we could make it use 32 integers instead: # In[16]: x = np.arange(10, dtype=np.int32) x.dtype # Or even floating-points: # In[17]: x = np.arange(10, dtype=np.float32) #or equivallently: x = np.arange(10, dtype='f4') x.dtype # Note that every element of Numpy array always have the same type, so, for instance, if your array dtype is `int64`, you cannot have a float on it (it will be converted, i.e. truncated to an integer): # In[18]: x = np.arange(5, dtype=np.int64) x[2] = 7.4 x # But you can "convert" the array to one with `float64` as data type: # In[19]: x = np.arange(5, dtype=np.int64) x = np.array(x, dtype=np.float64) x[2] = 7.4 x # ## Broadcasting # When the shape of two array diverges, numpy will automatically attempt to broadcast the array in order to allow the operation to succeed. A simple example: # # * `x` has shape (10) and `y` has shape (4, 10), then # * `x` will be broadcasted to shape (1, 10). # * The operation will succeed. # In[20]: x = np.ones(10) print(x.shape) y = np.arange(40).reshape((4, 10)) print(y.shape) x + y # However, the broadcast operation of creating new dimensions on the array only work to the left, so the following code won't work: # # ` # x = np.ones(10) # y = np.arange(40).reshape((10, 4)) # x + y # ` # ## Further reading # # [https://docs.scipy.org/doc/numpy-dev/user/quickstart.html](https://docs.scipy.org/doc/numpy-dev/user/quickstart.html)