NumPy
Official Site
Importing Numpy
import numpy as np
Array Creation
Creating Arrays
# One-dimensional array
a = np.array([1, 2, 3])
print(a)
print(a.ndim)
# Output:
# [1 2 3]
# 1
# Multi-dimensional array
b = np.array([[1,2,3],[4,5,6]])
print(b)
# Output:
# array([[1, 2, 3],
# [4, 5, 6]])
Array Attributes
# Shape of array
b.shape # Output: (2, 3)
# Type of elements in the array
a.dtype # Output: dtype('int64')
# Floats in numpy arrays
c = np.array([2.2, 5, 1.1])
print(c.dtype.name) # Output: 'float64'
print(c) # Output: array([2.2, 5. , 1.1])
Creating Arrays with Initial Placeholders
# Array of zeros
d = np.zeros((2,3))
print(d)
# Array of ones
e = np.ones((2,3))
print(e)
# Output:
# [[0. 0. 0.]
# [0. 0. 0.]]
# [[1. 1. 1.]
# [1. 1. 1.]]
# Array with random numbers
print(np.random.rand(2,3))
# Output:
# array([[0.14944211, 0.58029371, 0.16199396],
# [0.24046702, 0.32131138, 0.56135856]])
Creating Sequences
# Sequence of numbers
f = np.arange(10, 50, 2)
print(f)
# Output:
# array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
# 44, 46, 48])
# Sequence of floats
print(np.linspace(0, 2, 15))
# Output:
# array([0. , 0.14285714, 0.28571429, 0.42857143, 0.57142857,
# 0.71428571, 0.85714286, 1. , 1.14285714, 1.28571429,
# 1.42857143, 1.57142857, 1.71428571, 1.85714286, 2. ])
Array Operations
Arithmetic Operations
# Creating arrays
a = np.array([10,20,30,40])
b = np.array([1, 2, 3, 4])
# Elementwise operations
print(a - b) # Output: [ 9 18 27 36]
print(a * b) # Output: [ 10 40 90 160]
# Converting Fahrenheit to Celsius
farenheit = np.array([0, -10, -5, -15, 0])
celcius = (farenheit - 32) * (5/9)
print(celcius)
# Output:
# array([-17.22222222, -22.77777778, -20. , -25.55555556,
# -17.22222222])
Boolean Arrays
# Boolean array example
print(celcius > -20) # Output: array([ True, False, False, False, True])
print(celcius % 2 == 0) # Output: array([False, False, True, False, False])
Matrix Operations
A = np.array([[1,1],[0,1]])
B = np.array([[2,0],[3,4]])
# Elementwise product
print(A * B) # Output: array([[2, 0], [0, 4]])
# Matrix product
print(A @ B) # Output: array([[5, 4], [3, 4]])
Upcasting
array1 = np.array([[1, 2, 3], [4, 5, 6]])
array2 = np.array([[7.1, 8.2, 9.1], [10.4, 11.2, 12.3]])
# Addition of arrays
array3 = array1 + array2
print(array3) # Output: array([[ 8.1, 10.2, 12.1], [14.4, 16.2, 18.3]])
print(array3.dtype) # Output: float64
Aggregation Functions
# Sum, max, min, and mean
print(array3.sum()) # Output: 79.3
print(array3.max()) # Output: 18.3
print(array3.min()) # Output: 8.1
print(array3.mean()) # Output: 13.216666666666667
# Aggregation on 2D arrays
b = np.arange(1, 16, 1).reshape(3, 5)
print(b)
# Output:
# array([[ 1, 2, 3, 4, 5],
# [ 6, 7, 8, 9, 10],
# [11, 12, 13, 14, 15]])
Indexing, Slicing, and Iterating
Indexing
# One-dimensional array
a = np.array([1, 3, 5, 7])
print(a[2]) # Output: 5
# Multidimensional array
a = np.array([[1, 2], [3, 4], [5, 6]])
print(a[1, 1]) # Output: 4
print(np.array([a[0, 0], a[1, 1], a[2, 1]])) # Output: array([1, 4, 6])
print(a[[0, 1, 2], [0, 1, 1]]) # Output: array([1, 4, 6])
Boolean Indexing
# Boolean array example
print(a > 5) # Output: array([[False, False], [False, False], [False, True]])
print(a[a > 5]) # Output: array([6])
Slicing
# One-dimensional slicing
a = np.array([0, 1, 2, 3, 4, 5])
print(a[:3]) # Output: [0 1 2]
print(a[2:4]) # Output: [2 3]
# Multidimensional slicing
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(a[:2]) # Output: array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(a[:2, 1:3]) # Output: array([[2, 3], [6, 7]])
Passing by Reference
# Modifying sub-array affects original array
sub_array = a[:2, 1:3]
sub_array[0, 0] = 50
print(sub_array[0, 0]) # Output: 50
print(a[0, 1]) # Output: 50
Working with Datasets
Loading a Dataset
wines = np.genfromtxt("datasets/winequality-red.csv", delimiter=";", skip_header=1)
print(wines)
Indexing and Slicing with Datasets
# Select a column
print(wines[:, 0])
print(wines[:, 0:1])
# Range of columns
print(wines[:, 0:3])
# Non-consecutive columns
print(wines[:, [0, 2, 4]])
Aggregation on Dataset
# Average quality of red wine
print(wines[:, -1].mean()) # Output: 5.6360225140712945
Graduate School Admissions Dataset
graduate_admission = np.genfromtxt('datasets/Admission_Predict.csv', dtype=None, delimiter=',', skip_header=1,
names=('Serial No', 'GRE Score', 'TOEFL Score',
'University Rating', 'SOP',
'LOR', 'CGPA', 'Research', 'Chance of Admit'))
print(graduate_admission.shape) # Output: (400,)
print(graduate_admission['CGPA'][0:5]) # Output: array([9.65, 8.87, 8. , 8.67, 8.21])
# Convert GPA to scale of 4
graduate_admission['CGPA'] = graduate_admission['CGPA'] / 10 * 4
print(graduate_admission['CGPA'][0:20]) # Output: first 20 values
# Students with research experience
print(len(graduate_admission[graduate_admission['Research'] == 1])) # Output: 219
# GRE score comparison based on chance of admission
print(graduate_admission[graduate_admission['Chance_of_Admit'] > 0.8]['GRE_Score'].mean())
print(graduate_admission[graduate_admission['Chance_of_Admit'] < 0.4]['GRE_Score'].mean())