Supervised Classification

The Classifier package handles supervised classification by traditional ML algorithms running in Earth Engine. These classifiers include CART, RandomForest, NaiveBayes and SVM. The general workflow for classification is:

  1. Collect training data. Assemble features which have a property that stores the known class label and properties storing numeric values for the predictors.
  2. Instantiate a classifier. Set its parameters if necessary.
  3. Train the classifier using the training data.
  4. Classify an image or feature collection.
  5. Estimate classification error with independent validation data.

The training data is a FeatureCollection with a property storing the class label and properties storing predictor variables. Class labels should be consecutive, integers starting from 0. If necessary, use remap() to convert class values to consecutive integers. The predictors should be numeric.

Training and/or validation data can come from a variety of sources. To collect training data interactively in Earth Engine, you can use the geometry drawing tools (see the geometry tools section of the Code Editor page). Alternatively, you can import predefined training data from an Earth Engine table asset (see the Importing Table Data page for details). Get a classifier from one of the constructors in ee.Classifier. Train the classifier using classifier.train(). Classify an Image or FeatureCollection using classify(). The following example uses a Classification and Regression Trees (CART) classifier (Breiman et al. 1984) to predict three simple classes:

Code Editor (JavaScript)

// Define a function that scales and masks Landsat 8 surface reflectance images.
function prepSrL8(image) {
  // Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  var qaMask = image.select('QA_PIXEL').bitwiseAnd(parseInt('11111', 2)).eq(0);
  var saturationMask = image.select('QA_RADSAT').eq(0);

  // Apply the scaling factors to the appropriate bands.
  var getFactorImg = function(factorNames) {
    var factorList = image.toDictionary().select(factorNames).values();
    return ee.Image.constant(factorList);
  };
  var scaleImg = getFactorImg([
    'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10']);
  var offsetImg = getFactorImg([
    'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10']);
  var scaled = image.select('SR_B.|ST_B10').multiply(scaleImg).add(offsetImg);

  // Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, null, true)
    .updateMask(qaMask).updateMask(saturationMask);
}

// Make a cloud-free Landsat 8 surface reflectance composite.
var image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
  .filterDate('2021-03-01', '2021-07-01')
  .map(prepSrL8)
  .median();

// Use these bands for prediction.
var bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5',
             'SR_B6', 'SR_B7', 'ST_B10'];

// Load training points. The numeric property 'class' stores known labels.
var points = ee.FeatureCollection('GOOGLE/EE/DEMOS/demo_landcover_labels');

// This property stores the land cover labels as consecutive
// integers starting from zero.
var label = 'landcover';

// Overlay the points on the imagery to get training.
var training = image.select(bands).sampleRegions({
  collection: points,
  properties: [label],
  scale: 30
});

// Train a CART classifier with default parameters.
var trained = ee.Classifier.smileCart().train(training, label, bands);

// Classify the image with the same bands used for training.
var classified = image.select(bands).classify(trained);

// Display the inputs and the results.
Map.setCenter(-122.0877, 37.7880, 11);
Map.addLayer(image,
             {bands: ['SR_B4', 'SR_B3', 'SR_B2'], min: 0, max: 0.25},
             'image');
Map.addLayer(classified,
             {min: 0, max: 2, palette: ['orange', 'green', 'blue']},
             'classification');

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Define a function that scales and masks Landsat 8 surface reflectance images.
def prep_sr_l8(image):
  """Scales and masks Landsat 8 surface reflectance images."""
  # Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  qa_mask = image.select('QA_PIXEL').bitwiseAnd(0b11111).eq(0)
  saturation_mask = image.select('QA_RADSAT').eq(0)

  # Apply the scaling factors to the appropriate bands.
  def _get_factor_img(factor_names):
    factor_list = image.toDictionary().select(factor_names).values()
    return ee.Image.constant(factor_list)

  scale_img = _get_factor_img([
      'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10'])
  offset_img = _get_factor_img([
      'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10'])
  scaled = image.select('SR_B.|ST_B10').multiply(scale_img).add(offset_img)

  # Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, None, True).updateMask(
      qa_mask).updateMask(saturation_mask)


# Make a cloud-free Landsat 8 surface reflectance composite.
l8_image = (
    ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
    .filterDate('2021-03-01', '2021-07-01')
    .map(prep_sr_l8)
    .median())

# Use these bands for prediction.
bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'ST_B10']

# Load training points. The numeric property 'class' stores known labels.
points = ee.FeatureCollection('GOOGLE/EE/DEMOS/demo_landcover_labels')

# This property stores the land cover labels as consecutive
# integers starting from zero.
label = 'landcover'

# Overlay the points on the imagery to get training.
training = l8_image.select(bands).sampleRegions({
    'collection': points,
    'properties': [label],
    'scale': 30
})

# Train a CART classifier with default parameters.
trained = ee.Classifier.smileCart().train(training, label, bands)

# Classify the image with the same bands used for training.
classified = l8_image.select(bands).classify(trained)

# Display the inputs and the results.
Map = geemap.core.Map()
Map.setCenter(-122.0877, 37.7880, 11)
Map.addLayer(l8_image,
             {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.25},
             'image')
Map.addLayer(classified,
             {'min': 0, 'max': 2, 'palette': ['orange', 'green', 'blue']},
             'classification')

In this example, the training points in the table store only the class label. Note that the training property ('landcover') stores consecutive integers starting at 0 (Use remap() on your table to turn your class labels into consecutive integers starting at zero if necessary). Also note the use of image.sampleRegions() to get the predictors into the table and create a training dataset. To train the classifier, specify the name of the class label property and a list of properties in the training table which the classifier should use for predictors. The number and order of the bands in the image to be classified must exactly match the order of the properties list provided to classifier.train(). Use image.select() to ensure that the classifier schema matches the image.

If the training data are polygons representing homogeneous regions, every pixel in each polygon is a training point. You can use polygons to train as illustrated in the following example:

Code Editor (JavaScript)

// Define a function that scales and masks Landsat 8 surface reflectance images.
function prepSrL8(image) {
  // Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  var qaMask = image.select('QA_PIXEL').bitwiseAnd(parseInt('11111', 2)).eq(0);
  var saturationMask = image.select('QA_RADSAT').eq(0);

  // Apply the scaling factors to the appropriate bands.
  var getFactorImg = function(factorNames) {
    var factorList = image.toDictionary().select(factorNames).values();
    return ee.Image.constant(factorList);
  };
  var scaleImg = getFactorImg([
    'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10']);
  var offsetImg = getFactorImg([
    'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10']);
  var scaled = image.select('SR_B.|ST_B10').multiply(scaleImg).add(offsetImg);

  // Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, null, true)
    .updateMask(qaMask).updateMask(saturationMask);
}

// Make a cloud-free Landsat 8 surface reflectance composite.
var image = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
  .filterDate('2018-01-01', '2019-01-01')
  .map(prepSrL8)
  .median();

// Use these bands for prediction.
var bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5',
             'SR_B6', 'SR_B7'];

// Manually created polygons.
var forest1 = ee.Geometry.Rectangle(-63.0187, -9.3958, -62.9793, -9.3443);
var forest2 = ee.Geometry.Rectangle(-62.8145, -9.206, -62.7688, -9.1735);
var nonForest1 = ee.Geometry.Rectangle(-62.8161, -9.5001, -62.7921, -9.4486);
var nonForest2 = ee.Geometry.Rectangle(-62.6788, -9.044, -62.6459, -8.9986);

// Make a FeatureCollection from the hand-made geometries.
var polygons = ee.FeatureCollection([
  ee.Feature(nonForest1, {'class': 0}),
  ee.Feature(nonForest2, {'class': 0}),
  ee.Feature(forest1, {'class': 1}),
  ee.Feature(forest2, {'class': 1}),
]);

// Get the values for all pixels in each polygon in the training.
var training = image.sampleRegions({
  // Get the sample from the polygons FeatureCollection.
  collection: polygons,
  // Keep this list of properties from the polygons.
  properties: ['class'],
  // Set the scale to get Landsat pixels in the polygons.
  scale: 30
});

// Create an SVM classifier with custom parameters.
var classifier = ee.Classifier.libsvm({
  kernelType: 'RBF',
  gamma: 0.5,
  cost: 10
});

// Train the classifier.
var trained = classifier.train(training, 'class', bands);

// Classify the image.
var classified = image.classify(trained);

// Display the classification result and the input image.
Map.setCenter(-62.836, -9.2399, 9);
Map.addLayer(image,
             {bands: ['SR_B4', 'SR_B3', 'SR_B2'], min: 0, max: 0.25},
             'image');
Map.addLayer(polygons, {color: 'yellow'}, 'training polygons');
Map.addLayer(classified,
             {min: 0, max: 1, palette: ['orange', 'green']},
             'deforestation');

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Define a function that scales and masks Landsat 8 surface reflectance images.
def prep_sr_l8(image):
  # Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  qa_mask = image.select('QA_PIXEL').bitwiseAnd(0b11111).eq(0)
  saturation_mask = image.select('QA_RADSAT').eq(0)

  # Apply the scaling factors to the appropriate bands.
  def _get_factor_img(factor_names):
    factor_list = image.toDictionary().select(factor_names).values()
    return ee.Image.constant(factor_list)
  scale_img = _get_factor_img([
      'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10'])
  offset_img = _get_factor_img([
      'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10'])
  scaled = image.select('SR_B.|ST_B10').multiply(scale_img).add(offset_img)

  # Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, None, True).updateMask(
      qa_mask).updateMask(saturation_mask)


# Make a cloud-free Landsat 8 surface reflectance composite.
l8_image = (
    ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
    .filterDate('2018-01-01', '2019-01-01')
    .map(prep_sr_l8)
    .median())

# Use these bands for prediction.
bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']

# Manually created polygons.
forest1 = ee.Geometry.Rectangle(-63.0187, -9.3958, -62.9793, -9.3443)
forest2 = ee.Geometry.Rectangle(-62.8145, -9.206, -62.7688, -9.1735)
nonForest1 = ee.Geometry.Rectangle(-62.8161, -9.5001, -62.7921, -9.4486)
nonForest2 = ee.Geometry.Rectangle(-62.6788, -9.044, -62.6459, -8.9986)

# Make a FeatureCollection from the hand-made geometries.
polygons = ee.FeatureCollection([
    ee.Feature(nonForest1, {'class': 0}),
    ee.Feature(nonForest2, {'class': 0}),
    ee.Feature(forest1, {'class': 1}),
    ee.Feature(forest2, {'class': 1}),
])

# Get the values for all pixels in each polygon in the training.
training = l8_image.sampleRegions({
    # Get the sample from the polygons FeatureCollection.
    'collection': polygons,
    # Keep this list of properties from the polygons.
    'properties': ['class'],
    # Set the scale to get Landsat pixels in the polygons.
    'scale': 30
})

# Create an SVM classifier with custom parameters.
classifier = ee.Classifier.libsvm({
    'kernelType': 'RBF',
    'gamma': 0.5,
    'cost': 10
})

# Train the classifier.
trained = classifier.train(training, 'class', bands)

# Classify the image.
classified = l8_image.classify(trained)

# Display the classification result and the input image.
Map = geemap.core.Map()
Map.setCenter(-62.836, -9.2399, 9)
Map.addLayer(l8_image,
             {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.25},
             'image')
Map.addLayer(polygons, {'color': 'yellow'}, 'training polygons')
Map.addLayer(classified,
             {'min': 0, 'max': 1, 'palette': ['orange', 'green']},
             'deforestation')

This example uses a Support Vector Machine (SVM) classifier (Burges 1998). Note that the SVM is specified with a set of custom parameters. Without a priori information about the physical nature of the prediction problem, optimal parameters are unknown. See Hsu et al. (2003) for a rough guide to choosing parameters for an SVM.

Accuracy Assessment

To assess the accuracy of a classifier, use a ConfusionMatrix (Stehman 1997). The following example uses sample() to generate training and validation data from a MODIS reference image and compares confusion matrices representing training and validation accuracy:

Code Editor (JavaScript)

// Define a region of interest.
var roi = ee.Geometry.BBox(-122.93, 36.99, -121.20, 38.16);

// Define a function that scales and masks Landsat 8 surface reflectance images.
function prepSrL8(image) {
  // Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  var qaMask = image.select('QA_PIXEL').bitwiseAnd(parseInt('11111', 2)).eq(0);
  var saturationMask = image.select('QA_RADSAT').eq(0);

  // Apply the scaling factors to the appropriate bands.
  var getFactorImg = function(factorNames) {
    var factorList = image.toDictionary().select(factorNames).values();
    return ee.Image.constant(factorList);
  };
  var scaleImg = getFactorImg([
    'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10']);
  var offsetImg = getFactorImg([
    'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10']);
  var scaled = image.select('SR_B.|ST_B10').multiply(scaleImg).add(offsetImg);

  // Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, null, true)
    .updateMask(qaMask).updateMask(saturationMask);
}

// Make a cloud-free Landsat 8 surface reflectance composite.
var input = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
    .filterBounds(roi)
    .filterDate('2020-03-01', '2020-07-01')
    .map(prepSrL8)
    .median()
    .setDefaultProjection('EPSG:4326', null, 30)
    .select(['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']);

// Use MODIS land cover, IGBP classification, for training.
var modis = ee.Image('MODIS/006/MCD12Q1/2020_01_01')
    .select('LC_Type1');

// Sample the input imagery to get a FeatureCollection of training data.
var training = input.addBands(modis).sample({
  region: roi,
  numPixels: 5000,
  seed: 0
});

// Make a Random Forest classifier and train it.
var classifier = ee.Classifier.smileRandomForest(10)
    .train({
      features: training,
      classProperty: 'LC_Type1',
      inputProperties: ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']
    });

// Classify the input imagery.
var classified = input.classify(classifier);

// Get a confusion matrix representing resubstitution accuracy.
var trainAccuracy = classifier.confusionMatrix();
print('Resubstitution error matrix: ', trainAccuracy);
print('Training overall accuracy: ', trainAccuracy.accuracy());

// Sample the input with a different random seed to get validation data.
var validation = input.addBands(modis).sample({
  region: roi,
  numPixels: 5000,
  seed: 1
  // Filter the result to get rid of any null pixels.
}).filter(ee.Filter.notNull(input.bandNames()));

// Classify the validation data.
var validated = validation.classify(classifier);

// Get a confusion matrix representing expected accuracy.
var testAccuracy = validated.errorMatrix('LC_Type1', 'classification');
print('Validation error matrix: ', testAccuracy);
print('Validation overall accuracy: ', testAccuracy.accuracy());

// Define a palette for the IGBP classification.
var igbpPalette = [
  'aec3d4', // water
  '152106', '225129', '369b47', '30eb5b', '387242', // forest
  '6a2325', 'c3aa69', 'b76031', 'd9903d', '91af40',  // shrub, grass
  '111149', // wetlands
  'cdb33b', // croplands
  'cc0013', // urban
  '33280d', // crop mosaic
  'd7cdcc', // snow and ice
  'f7e084', // barren
  '6f6f6f'  // tundra
];

// Display the input and the classification.
Map.centerObject(roi, 10);
Map.addLayer(input.clip(roi),
             {bands: ['SR_B4', 'SR_B3', 'SR_B2'], min: 0, max: 0.25},
             'landsat');
Map.addLayer(classified.clip(roi),
             {palette: igbpPalette, min: 0, max: 17},
             'classification');

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Define a region of interest.
roi = ee.Geometry.BBox(-122.93, 36.99, -121.20, 38.16)


# Define a function that scales and masks Landsat 8 surface reflectance images.
def prep_sr_l8(image):
  """Scales and masks Landsat 8 surface reflectance images."""
  # Develop masks for unwanted pixels (fill, cloud, cloud shadow).
  qa_mask = image.select('QA_PIXEL').bitwiseAnd(0b1111).eq(0)
  saturation_mask = image.select('QA_RADSAT').eq(0)

  # Apply the scaling factors to the appropriate bands.
  def _get_factor_img(factor_names):
    factor_list = image.toDictionary().select(factor_names).values()
    return ee.Image.constant(factor_list)

  scale_img = _get_factor_img([
      'REFLECTANCE_MULT_BAND_.|TEMPERATURE_MULT_BAND_ST_B10'])
  offset_img = _get_factor_img([
      'REFLECTANCE_ADD_BAND_.|TEMPERATURE_ADD_BAND_ST_B10'])
  scaled = image.select('SR_B.|ST_B10').multiply(scale_img).add(offset_img)

  # Replace original bands with scaled bands and apply masks.
  return image.addBands(scaled, None, True).updateMask(
      qa_mask).updateMask(saturation_mask)


# Make a cloud-free Landsat 8 surface reflectance composite.
inputImage = (
    ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
    .filterBounds(roi)
    .filterDate('2020-03-01', '2020-07-01')
    .map(prep_sr_l8)
    .median()
    .setDefaultProjection('EPSG:4326', None, 30)
    .select(['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']))

# Use MODIS land cover, IGBP classification, for training.
modis = ee.Image('MODIS/006/MCD12Q1/2020_01_01').select('LC_Type1')

# Sample the input imagery to get a FeatureCollection of training data.
training = inputImage.addBands(modis).sample({
    'region': roi,
    'numPixels': 5000,
    'seed': 0
})

# Make a Random Forest classifier and train it.
classifier = ee.Classifier.smileRandomForest(10).train({
    'features': training,
    'classProperty': 'LC_Type1',
    'inputProperties': [
        'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']})

# Classify the input imagery.
classified = inputImage.classify(classifier)

# Get a confusion matrix representing resubstitution accuracy.
train_accuracy = classifier.confusionMatrix()
print('Resubstitution error matrix: ', train_accuracy)
print('Training overall accuracy: ', train_accuracy.accuracy())

# Sample the input with a different random seed to get validation data.
validation = inputImage.addBands(modis).sample({
    'region': roi,
    'numPixels': 5000,
    'seed': 1
            # Filter the result to get rid of any null pixels.
    }).filter(ee.Filter.notNull(inputImage.bandNames()))

# Classify the validation data.
validated = validation.classify(classifier)

# Get a confusion matrix representing expected accuracy.
testAccuracy = validated.errorMatrix('LC_Type1', 'classification')
print('Validation error matrix: ', testAccuracy)
print('Validation overall accuracy: ', testAccuracy.accuracy())

# Define a palette for the IGBP classification.
igbp_palette = [
    'aec3d4',  # water
    '152106', '225129', '369b47', '30eb5b', '387242',  # forest
    '6a2325', 'c3aa69', 'b76031', 'd9903d', '91af40',  # shrub, grass
    '111149',  # wetlands
    'cdb33b',  # croplands
    'cc0013',  # urban
    '33280d',  # crop mosaic
    'd7cdcc',  # snow and ice
    'f7e084',  # barren
    '6f6f6f'   # tundra
]

# Display the input and the classification with geemap in a notebook.
Map = geemap.Map
Map.centerObject(roi, 10)
Map.addLayer(inputImage.clip(roi),
             {'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.25},
             'landsat')
Map.addLayer(classified.clip(roi),
             {'palette': igbp_palette, 'min': 0, 'max': 17},
             'classification')

This example uses a random forest (Breiman 2001) classifier with 10 trees to downscale MODIS data to Landsat resolution. The sample() method generates two random samples from the MODIS data: one for training and one for validation. The training sample is used to train the classifier. You can get resubstitution accuracy on the training data from classifier.confusionMatrix(). To get validation accuracy, classify the validation data. This adds a classification property to the validation FeatureCollection. Call errorMatrix() on the classified FeatureCollection to get a confusion matrix representing validation (expected) accuracy.

Inspect the output to see that the overall accuracy estimated from the training data is much higher than the validation data. The accuracy estimated from training data is an overestimate because the random forest is “fit” to the training data. The expected accuracy on unknown data is lower, as indicated by the estimate from the validation data.

You can also take a single sample and partition it with the randomColumn() method on feature collections. Continuing the previous example:

Code Editor (JavaScript)

var sample = input.addBands(modis).sample({
  numPixels: 5000,
  seed: 0
});

// The randomColumn() method will add a column of uniform random
// numbers in a column named 'random' by default.
sample = sample.randomColumn();

var split = 0.7;  // Roughly 70% training, 30% testing.
var training = sample.filter(ee.Filter.lt('random', split));
var validation = sample.filter(ee.Filter.gte('random', split));

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

sample = inputImage.addBands(modis).sample({
    'numPixels': 5000,
    'seed': 0
})

# The randomColumn() method will add a column of uniform random
# numbers in a column named 'random' by default.
sample = sample.randomColumn()

split = 0.7  # Roughly 70% training, 30% testing.
training = sample.filter(ee.Filter.lt('random', split))
validation = sample.filter(ee.Filter.gte('random', split))

You may also want to ensure that the training samples are uncorrelated with the evaluation samples. This might result from spatial autocorrelation of the phenomenon being predicted. One way to exclude samples that might be correlated in this manner is to remove samples that are within some distance to any other sample(s). This can be accomplished with a spatial join:

Code Editor (JavaScript)

// Sample the input imagery to get a FeatureCollection of training data.
var sample = input.addBands(modis).sample({
  region: roi,
  numPixels: 5000,
  seed: 0,
  geometries: true,
  tileScale: 16
});

// The randomColumn() method will add a column of uniform random
// numbers in a column named 'random' by default.
sample = sample.randomColumn();

var split = 0.7;  // Roughly 70% training, 30% testing.
var training = sample.filter(ee.Filter.lt('random', split));
print(training.size());
var validation = sample.filter(ee.Filter.gte('random', split));

// Spatial join.
var distFilter = ee.Filter.withinDistance({
  distance: 1000,
  leftField: '.geo',
  rightField: '.geo',
  maxError: 10
});

var join = ee.Join.inverted();

// Apply the join.
training = join.apply(training, validation, distFilter);
print(training.size());

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Sample the input imagery to get a FeatureCollection of training data.
sample = inputImage.addBands(modis).sample({
    'region': roi,
    'numPixels': 5000,
    'seed': 0,
    'geometries': True,
    'tileScale': 16
})

# The randomColumn() method will add a column of uniform random
# numbers in a column named 'random' by default.
sample = sample.randomColumn()

split = 0.7  # Roughly 70% training, 30% testing.
training = sample.filter(ee.Filter.lt('random', split))
print(training.size())
validation = sample.filter(ee.Filter.gte('random', split))

# Spatial join.
dist_filter = ee.Filter.withinDistance({
    'distance': 1000,
    'leftField': '.geo',
    'rightField': '.geo',
    'maxError': 10
})

join = ee.Join.inverted()

# Apply the join.
training = join.apply(training, validation, dist_filter)
print(training.size())

In the previous snippet, note that geometries is set to true in sample(). This is to retain the spatial information of the sample points needed for a spatial join. Also note that tileScale is set to 16. This is to avoid the "User memory limit exceeded" error.

Saving Classifiers

Training a classifier on a large amount of input data may not be possible interactively because the input is too large (>99 MB) or because training takes too long (5 minutes). Use Export.classifier.toAsset to run the classifier training as a batch job, where it can run for longer with more memory. Expensive-to-train classifiers can be exported and reloaded to avoid the need to retrain.

Code Editor (JavaScript)

// Using the random forest classifier defined earlier, export the random 
// forest classifier as an Earth Engine asset.
Export.classifier.toAsset(
  classifier,
  "Saved-random-forest-IGBP-classification",
  "upscaled_MCD12Q1_random_forest"
);

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Using the random forest classifier defined earlier, export the random
# forest classifier as an Earth Engine asset.
classifier_asset_id = '<asset_prefix>/upscaled_MCD12Q1_random_forest'
task = ee.batch.Export.classifier.toAsset(
    classifier, 'Saved-random-forest-IGBP-classification', classifier_asset_id
)
task.start()

To load the saved classifier, use the algorithm `ee.Classifier.load`, specify the exported classifier ID and use it just like any other trained classifier.

Code Editor (JavaScript)

// Once the classifier export finishes, we can load our saved classifier.
var classifierAssetId = "<asset_prefix>/upscaled_MCD12Q1_random_forest";
var savedClassifier = ee.Classifier.load(classifierAssetId);
// We can perform classification just as before with the saved classifier now.
Map.addLayer(input.classify(savedClassifier).clip(roi),
             {palette: igbpPalette, min: 0, max: 17},
             'classification');

Python setup

See the Python Environment page for information on the Python API and using geemap for interactive development.

import ee
import geemap.core as geemap

Colab (Python)

# Once the classifier export finishes, we can load our saved classifier.
saved_classifier = ee.Classifier.load(classifier_asset_id)
# We can perform classification just as before with the saved classifier now.
Map.addLayer(inputImage.classify(saved_classifier).clip(roi),
             {'palette': igbp_palette, 'min': 0, 'max': 17},
             'classification')