Land cover classification using a Naives Bayes algorithm with Python - Tutorial
/Machine learning can be applied to many fields as land cover classification from remote sensing imagery. The performance and accuracy of classification will depend on the number of raster bands, the image resolution, the land cover type and the algorithm used. This tutorial will perform an applied case of land cover classification from a panchromatic image in Python using the Naives Bayes algorithm implemented on the Scikit Learn package. The classification will cover four categories as: rivers, river beaches, woods and pastures; coding is performed under a Jupyter Notebook with Python running from a geospatial Conda environment. Some graphics and statistics about the classification precision are also included on the tutorial.
IMPORTANT: You will need a conda environment with geospatial tools for the tutorial. Create the environment following this link: hatarilabs.com/ih-en/how-to-install-python-geopandas-in-windows-on-a-conda-environment-tutorial
Tutorial
Code
Part 1 Data processing
import rasterio
from rasterio.plot import show
classRst = rasterio.open('../Data/Class/LandCoverClassified_v2.tif')
show(classRst)
classBand = classRst.read(1)
classBand[:5,:5]
array([[40., 40., 40., 40., 40.],
[40., 40., 40., 40., 40.],
[40., 40., 40., 40., 40.],
[40., 40., 40., 40., 40.],
[40., 40., 40., 40., 40.]], dtype=float32)
nrow, ncol = classBand.shape
print(nrow,ncol)
42 56
# get a sample pixel coordinates
for row in range(nrow)[:3]:
for col in range(ncol)[:3]:
print(classRst.xy(row,col))
(-73.163875, -3.916002123)
(-73.16362500000001, -3.916002123)
(-73.163375, -3.916002123)
(-73.163875, -3.916252123)
(-73.16362500000001, -3.916252123)
(-73.163375, -3.916252123)
(-73.163875, -3.9165021230000003)
(-73.16362500000001, -3.9165021230000003)
(-73.163375, -3.9165021230000003)
# get pixel value for a give coordinate
imageRst = rasterio.open('../Data/Rst/riverImage2.tif')
imageBand = imageRst.read(1)
row, col = imageRst.index(-73.16275, -3.91712)
bandValue = imageBand[row,col]
print(bandValue)
15
# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
row, col = imageRst.index(x,y)
bandValue = imageBand[row,col]
return(bandValue)
rasterValue(imageRst,blueBand,-73.16275, -3.91712)
45
#create complex function for all rows, cols and bands
import sys
classList = []
# get a sample pixel coordinates
for row in range(nrow):
for col in range(ncol):
sys.stdout.write('\r'+'Working with row, col: %d %d'%(row,col))
partialLine = []
x,y = classRst.xy(row,col)
for band in bandList:
value = rasterValue(imageRst,band,x,y)
partialLine.append(value)
partialLine.append(classBand[row,col])
classList.append(partialLine)
Working with row, col: 41 55
import numpy as np
classArray = np.array(classList)
classArray[:5]
array([[15., 56., 40., 40.],
[10., 51., 35., 40.],
[13., 63., 42., 40.],
[13., 65., 42., 40.],
[18., 70., 47., 40.]], dtype=float32)
np.save('../Data/Txt/classArray',classArray)
Part 2 Naives Bayes
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
classArray = np.load('../Data/Txt/classArray.npy')
classArray[:5]
array([[15., 56., 40., 40.],
[10., 51., 35., 40.],
[13., 63., 42., 40.],
[13., 65., 42., 40.],
[18., 70., 47., 40.]], dtype=float32)
X = classArray[:,:-1]
X[:2]
array([[15., 56., 40.],
[10., 51., 35.]], dtype=float32)
y = classArray[:,-1]
y[:2]
array([40., 40.], dtype=float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = GaussianNB()
clf.fit(X_train, y_train)
GaussianNB()
y_predict = clf.predict(X_test)
import matplotlib.pyplot as plt
fig= plt.figure(figsize=(12,6))
plt.scatter(y_test,y_predict,alpha=0.01,s=100)
<matplotlib.collections.PathCollection at 0x2b37a32db20>
from sklearn.metrics import confusion_matrix
confMatrix = confusion_matrix(y_test,y_predict)
confMatrix
array([[238, 0, 12, 1],
[ 10, 111, 3, 0],
[ 2, 6, 45, 8],
[ 0, 0, 39, 302]], dtype=int64)
#!pip install seaborn
import seaborn as sns; sns.set_theme()
fig= plt.figure(figsize=(12,6))
ax = sns.heatmap(confMatrix)
# generation of output raster
import rasterio
res = 0.0005
x0,y0 = -73.2870, -3.9635
xLL, yLL = x0 - res/2, y0 - res/2
nrow, ncol = 60, 100
#definition of the raster transform array
from rasterio.transform import Affine
transform = Affine.translation(xLL, yLL)*Affine.scale(res,res)
transform
Affine(0.0005, 0.0, -73.28725,
0.0, 0.0005, -3.9637499999999997)
#get crs as wkt
from rasterio.crs import CRS
rasterCrs = CRS.from_epsg(4326)
rasterCrs.data
{'init': 'epsg:4326'}
imageRst = rasterio.open('../Data/Rst/riverImage2.tif')
# open list of image bands
redBand = imageRst.read(1)
greenBand = imageRst.read(2)
blueBand = imageRst.read(3)
bandList = [redBand, greenBand, blueBand]
import rasterio
# empty array
classArray = np.zeros([nrow,ncol])
### From Part 1
#define a function that returns a band value
def rasterValue(imageRst,imageBand,x,y):
row, col = imageRst.index(x,y)
bandValue = imageBand[row,col]
return(bandValue)
#create complex function for all rows, cols and bands
import sys
classList = []
# get a sample pixel coordinates
for row in range(nrow):
for col in range(ncol):
sys.stdout.write('\r'+'Working with row, col: %d %d'%(row,col))
partialLine = []
#x,y = classRst.xy(row,col)
x = x0 + col*res #<--new
y = y0 + row*res #<--new
for band in bandList:
value = rasterValue(imageRst,band,x,y)
partialLine.append(value)
y_predict = clf.predict([np.array(partialLine)]) #<--new
classArray[row,col] = y_predict #<--new
#partialLine.append(classBand[row,col])
#classList.append(partialLine)
Working with row, col: 59 99
#show class array
classArray
array([[40., 40., 40., ..., 40., 40., 40.],
[40., 40., 40., ..., 40., 40., 40.],
[40., 40., 40., ..., 40., 40., 40.],
...,
[10., 30., 40., ..., 40., 40., 40.],
[30., 10., 30., ..., 40., 40., 40.],
[40., 40., 30., ..., 40., 40., 40.]])
#plot array
plt.imshow(classArray)
<matplotlib.image.AxesImage at 0x2b37df03400>
#definition, register and close of interpolated raster
interpRaster = rasterio.open('../Data/Class/predictedLandCover.tif',
'w',
driver='GTiff',
height=nrow,
width=ncol,
count=1,
dtype=np.float32,
crs=rasterCrs,
transform=transform,
)
interpRaster.write(classArray,1)
interpRaster.close()
Input data
You can download the input data from this link.