Compound Indexes

{
    wget https://www.dropbox.com/s/xso05aiimuhnytj/customers-test-data.json

    mongoimport --db test --collection Customers --jsonArray customers-test-data.json
}
2021-04-08T08:58:23.953+0000    connected to: localhost
2021-04-08T08:58:26.952+0000    [################........] test.Customers       60.1MB/85.7MB (70.2%)
2021-04-08T08:58:28.248+0000    [########################] test.Customers       85.7MB/85.7MB (100.0%)
2021-04-08T08:58:28.248+0000    imported 70000 documents

Speeding up a very simple query

We will now execute a simple query against our newly-created database to find all customers whose surname is ‘Johnston’.

mongo
use test;
switched to db test
db.Customers.find({
    "Name.Last Name" : "Johnston"
}, {
    "_id" : NumberInt(0),
    "Name.First Name" : NumberInt(1),
    "Name.Last Name" : NumberInt(1)
}).sort({
    "Name.Last Name" : NumberInt(1)
});
{ "Name" : { "First Name" : "Carole", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Audrey", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Shayne", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Ty", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Wilson", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Rosalinda", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Darin", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Chastity", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Alison", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Gerardo", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Jonathon", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Renae", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Gus", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Jasen", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Anissa", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Mayra", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Kasey", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Karrie", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Daryl", "Last Name" : "Johnston" } }
{ "Name" : { "First Name" : "Misti", "Last Name" : "Johnston" } }
Type "it" for more

Once we are happy that the query is returning the correct result, we can modify it to return the execution stats.

db.Customers.find({
    "Name.Last Name" : "Johnston"
}, {
    "_id" : NumberInt(0),
    "Name.First Name" : NumberInt(1),
    "Name.Last Name" : NumberInt(1)
}).sort({
    "Name.Last Name" : NumberInt(1)
}).explain("executionStats");
{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "test.Customers",
    "indexFilterSet": false,
    "parsedQuery": {
      "Name.Last Name": {
        "$eq": "Johnston"
      }
    },
    "winningPlan": {
      "stage": "PROJECTION",
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "SORT",
        "sortPattern": {
          "Name.Last Name": 1
        },
        "inputStage": {
          "stage": "SORT_KEY_GENERATOR",
          "inputStage": {
            "stage": "COLLSCAN",
            "filter": {
              "Name.Last Name": {
                "$eq": "Johnston"
              }
            },
            "direction": "forward"
          }
        }
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 68,
    "executionTimeMillis": 51,
    "totalKeysExamined": 0,
    "totalDocsExamined": 70000,
    "executionStages": {
      "stage": "PROJECTION",
      "nReturned": 68,
      "executionTimeMillisEstimate": 0,
      "works": 70072,
      "advanced": 68,
      "needTime": 70003,
      "needYield": 0,
      "saveState": 547,
      "restoreState": 547,
      "isEOF": 1,
      "invalidates": 0,
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "SORT",
        "nReturned": 68,
        "executionTimeMillisEstimate": 0,
        "works": 70072,
        "advanced": 68,
        "needTime": 70003,
        "needYield": 0,
        "saveState": 547,
        "restoreState": 547,
        "isEOF": 1,
        "invalidates": 0,
        "sortPattern": {
          "Name.Last Name": 1
        },
        "memUsage": 88873,
        "memLimit": 33554432,
        "inputStage": {
          "stage": "SORT_KEY_GENERATOR",
          "nReturned": 68,
          "executionTimeMillisEstimate": 0,
          "works": 70003,
          "advanced": 68,
          "needTime": 69934,
          "needYield": 0,
          "saveState": 547,
          "restoreState": 547,
          "isEOF": 1,
          "invalidates": 0,
          "inputStage": {
            "stage": "COLLSCAN",
            "filter": {
              "Name.Last Name": {
                "$eq": "Johnston"
              }
            },
            "nReturned": 68,
            "executionTimeMillisEstimate": 0,
            "works": 70002,
            "advanced": 68,
            "needTime": 69933,
            "needYield": 0,
            "saveState": 547,
            "restoreState": 547,
            "isEOF": 1,
            "invalidates": 0,
            "direction": "forward",
            "docsExamined": 70000
          }
        }
      }
    }
  },
  "serverInfo": {
    "host": "admatic-instance-1",
    "port": 27017,
    "version": "4.0.23",
    "gitVersion": "07c6611b38d2aacbdb1846b688db70b3273170fb"
  },
  "ok": 1
}

According to the execution stats of explain, this takes 51 ms on my machine (ExecutionTimeMillis). This involves a COLLSCAN, meaning that there is no index available, so mongo must scan the entire collection.

This isn’t necessarily a bad thing with a reasonably small collection, but as the size increases and more users access the data, the collection is less likely to fit in paged memory, and disk activity will increase.

The database won’t scale well if it is forced to do a large percentage of COLLSCANs. It is a good idea to minimize the resources used by frequently-run queries.

Well, it is obvious that if an index is going to reduce the time taken, it is likely to involve Name.Last Name.

Let’s start with that then, making it an ascending index as we want the sort to be ascending:

db.Customers.createIndex( {"Name.Last Name" : 1 },{ name: "LastNameIndex"} )
{
  "createdCollectionAutomatically": false,
  "numIndexesBefore": 1,
  "numIndexesAfter": 2,
  "ok": 1
}
db.Customers.find({
    "Name.Last Name" : "Johnston"
}, {
    "_id" : NumberInt(0),
    "Name.First Name" : NumberInt(1),
    "Name.Last Name" : NumberInt(1)
}).sort({
    "Name.Last Name" : NumberInt(1)
}).explain("executionStats");
{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "test.Customers",
    "indexFilterSet": false,
    "parsedQuery": {
      "Name.Last Name": {
        "$eq": "Johnston"
      }
    },
    "winningPlan": {
      "stage": "PROJECTION",
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "FETCH",
        "inputStage": {
          "stage": "IXSCAN",
          "keyPattern": {
            "Name.Last Name": 1
          },
          "indexName": "LastNameIndex",
          "isMultiKey": false,
          "multiKeyPaths": {
            "Name.Last Name": []
          },
          "isUnique": false,
          "isSparse": false,
          "isPartial": false,
          "indexVersion": 2,
          "direction": "forward",
          "indexBounds": {
            "Name.Last Name": ["[\"Johnston\", \"Johnston\"]"]
          }
        }
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 68,
    "executionTimeMillis": 4,
    "totalKeysExamined": 68,
    "totalDocsExamined": 68,
    "executionStages": {
      "stage": "PROJECTION",
      "nReturned": 68,
      "executionTimeMillisEstimate": 0,
      "works": 69,
      "advanced": 68,
      "needTime": 0,
      "needYield": 0,
      "saveState": 0,
      "restoreState": 0,
      "isEOF": 1,
      "invalidates": 0,
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "FETCH",
        "nReturned": 68,
        "executionTimeMillisEstimate": 0,
        "works": 69,
        "advanced": 68,
        "needTime": 0,
        "needYield": 0,
        "saveState": 0,
        "restoreState": 0,
        "isEOF": 1,
        "invalidates": 0,
        "docsExamined": 68,
        "alreadyHasObj": 0,
        "inputStage": {
          "stage": "IXSCAN",
          "nReturned": 68,
          "executionTimeMillisEstimate": 0,
          "works": 69,
          "advanced": 68,
          "needTime": 0,
          "needYield": 0,
          "saveState": 0,
          "restoreState": 0,
          "isEOF": 1,
          "invalidates": 0,
          "keyPattern": {
            "Name.Last Name": 1
          },
          "indexName": "LastNameIndex",
          "isMultiKey": false,
          "multiKeyPaths": {
            "Name.Last Name": []
          },
          "isUnique": false,
          "isSparse": false,
          "isPartial": false,
          "indexVersion": 2,
          "direction": "forward",
          "indexBounds": {
            "Name.Last Name": ["[\"Johnston\", \"Johnston\"]"]
          },
          "keysExamined": 68,
          "seeks": 1,
          "dupsTested": 0,
          "dupsDropped": 0,
          "seenInvalidated": 0
        }
      }
    }
  },
  "serverInfo": {
    "host": "admatic-instance-1",
    "port": 27017,
    "version": "4.0.23",
    "gitVersion": "07c6611b38d2aacbdb1846b688db70b3273170fb"
  },
  "ok": 1
}

It now takes 4 ms on my machine (ExecutionTimeMillis). This involves an IXSCAN (an index scan to get keys) followed by a FETCH (for retrieving the documents).

We can improve on this because the query has to get the first name.

If we add the Name.First Name into the index, then the database engine can use the value in the index rather than having the extra step of taking it from the database.

db.Customers.dropIndex("LastNameIndex")
{ "nIndexesWas": 2, "ok": 1 }
db.Customers.createIndex( { "Name.Last Name" : 1,"Name.First Name" : 1 },
{ name: "LastNameCompoundIndex"} )
{
  "createdCollectionAutomatically": false,
  "numIndexesBefore": 1,
  "numIndexesAfter": 2,
  "ok": 1
}
db.Customers.find({
    "Name.Last Name" : "Johnston"
}, {
    "_id" : NumberInt(0),
    "Name.First Name" : NumberInt(1),
    "Name.Last Name" : NumberInt(1)
}).sort({
    "Name.Last Name" : NumberInt(1)
}).explain("executionStats");
{
  "queryPlanner": {
    "plannerVersion": 1,
    "namespace": "test.Customers",
    "indexFilterSet": false,
    "parsedQuery": {
      "Name.Last Name": {
        "$eq": "Johnston"
      }
    },
    "winningPlan": {
      "stage": "PROJECTION",
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "IXSCAN",
        "keyPattern": {
          "Name.Last Name": 1,
          "Name.First Name": 1
        },
        "indexName": "LastNameCompoundIndex",
        "isMultiKey": false,
        "multiKeyPaths": {
          "Name.Last Name": [],
          "Name.First Name": []
        },
        "isUnique": false,
        "isSparse": false,
        "isPartial": false,
        "indexVersion": 2,
        "direction": "forward",
        "indexBounds": {
          "Name.Last Name": ["[\"Johnston\", \"Johnston\"]"],
          "Name.First Name": ["[MinKey, MaxKey]"]
        }
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 68,
    "executionTimeMillis": 1,
    "totalKeysExamined": 68,
    "totalDocsExamined": 0,
    "executionStages": {
      "stage": "PROJECTION",
      "nReturned": 68,
      "executionTimeMillisEstimate": 0,
      "works": 69,
      "advanced": 68,
      "needTime": 0,
      "needYield": 0,
      "saveState": 0,
      "restoreState": 0,
      "isEOF": 1,
      "invalidates": 0,
      "transformBy": {
        "_id": 0,
        "Name.First Name": 1,
        "Name.Last Name": 1
      },
      "inputStage": {
        "stage": "IXSCAN",
        "nReturned": 68,
        "executionTimeMillisEstimate": 0,
        "works": 69,
        "advanced": 68,
        "needTime": 0,
        "needYield": 0,
        "saveState": 0,
        "restoreState": 0,
        "isEOF": 1,
        "invalidates": 0,
        "keyPattern": {
          "Name.Last Name": 1,
          "Name.First Name": 1
        },
        "indexName": "LastNameCompoundIndex",
        "isMultiKey": false,
        "multiKeyPaths": {
          "Name.Last Name": [],
          "Name.First Name": []
        },
        "isUnique": false,
        "isSparse": false,
        "isPartial": false,
        "indexVersion": 2,
        "direction": "forward",
        "indexBounds": {
          "Name.Last Name": ["[\"Johnston\", \"Johnston\"]"],
          "Name.First Name": ["[MinKey, MaxKey]"]
        },
        "keysExamined": 68,
        "seeks": 1,
        "dupsTested": 0,
        "dupsDropped": 0,
        "seenInvalidated": 0
      }
    }
  },
  "serverInfo": {
    "host": "admatic-instance-1",
    "port": 27017,
    "version": "4.0.23",
    "gitVersion": "07c6611b38d2aacbdb1846b688db70b3273170fb"
  },
  "ok": 1
}

With this in place, the query takes only 1ms.

Because the index covered the query, MongoDB was able to match the query conditions and return the results using only the index keys; without even needing to examine documents from the collection to return the results. (If you see an IXSCAN stage that is not a child of a FETCH stage, in the execution plan then the index covered the query.)

results matching ""

    No results matching ""