Schema validation
While MongoDB does not enforce data type validation by default, it can be enabled by specifying validation rules when creating a collection.
// Use database: sample_supplies
> use sample_supplies
switched to db sample_supplies
// Show all collections
> show collections
sales
Create a collecion with db.createCollection()
method and specify a validator
with the $jsonSchema
operator:
> db.createCollection( 'customers',
{
validator: {
$jsonSchema: {
<schema>
}
}
}
)
JSON Schema
JSON Schema is a document that defines rules for a document, such as required fields, field properties, and min/max values of fields.
A list of BSON types:
https://docs.mongodb.com/manual/reference/bson-types/
Example:
{
$jsonSchema: {
bsonType: "object",
required: [ "name", "email" ],
properties: {
name: {
bsonType: "string",
description: "name of customer",
maxLength: 50
},
email: {
bsonType: "string",
description: "email address",
maxLength: 100
},
age: {
bsonType: [ "int" ],
description: "age",
minimum: 0,
maximum: 130,
}
}
}
}
In summary:
- These fields must exist and not be empty: “name”, “email”
- “name” (must be a string) cannot be more than 50 characters long
- “email” (must be a string) cannot be more than 100 characters long
- “age” (must be an integer if exist) cannot be negative or more than 130
Note: when specifying a bsonType
, if the type is not enclosed in an array, it means the field must be that type and cannot be empty.
Create a collection with JSON Schema
Combining the db.createCollection()
method and the above JSON Schema:
> db.createCollection( 'customers',
{
validator: {
$jsonSchema: {
required: [ "name", "email"],
properties: {
name: {
bsonType: "string",
description: "name of customer",
maxLength: 50
},
email: {
bsonType: "string",
description: "email address",
maxLength: 100
},
age: {
bsonType: [ "int" ],
description: "age",
minimum: 0,
maximum: 130,
}
}
}
}
}
)
Output:
{ "ok" : 1 }
Checking schema
To check the validation schema of a collection, we can use the db.getCollectionInfos()
method:
> db.getCollectionInfos({name: "customers"})
[
{
"name" : "customers",
"type" : "collection",
"options" : {
"validator" : {
"$jsonSchema" : {
"required" : [
"name",
"email"
],
"properties" : {
"name" : {
"bsonType" : "string",
"description" : "name of customer",
"maxLength" : 50
},
"email" : {
"bsonType" : "string",
"description" : "email address",
"maxLength" : 100
},
"age" : {
"bsonType" : [
"int"
],
"description" : "age",
"minimum" : 0,
"maximum" : 130
}
}
}
}
},
"info" : {
"readOnly" : false,
"uuid" : UUID("6322ae8c-f2ba-4454-9025-ae7a8b49d17e")
},
"idIndex" : {
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_"
}
}
]
Verifying validation
Let us try to insert a document:
> db.customers.insertOne({
name: "Bob",
email: "[email protected]",
age: 21
})
Output:
WriteError({
"index" : 0,
"code" : 121,
"errmsg" : "Document failed validation",
"op" : {
"_id" : ObjectId("60b4c5f17a9653eb847a823a"),
"name" : "Bob",
"email" : "[email protected]",
"age" : 21
}
}) :
WriteError({
"index" : 0,
"code" : 121,
"errmsg" : "Document failed validation",
"op" : {
"_id" : ObjectId("60b4c5f17a9653eb847a823a"),
"name" : "Bob",
"email" : "[email protected]",
"age" : 21
}
})
WriteError@src/mongo/shell/bulk_api.js:458:48
mergeBatchResults@src/mongo/shell/bulk_api.js:855:49
executeBatch@src/mongo/shell/bulk_api.js:919:13
Bulk/this.execute@src/mongo/shell/bulk_api.js:1163:21
DBCollection.prototype.insertOne@src/mongo/shell/crud_api.js:264:9
@(shell):1:1
It returned an error. That is because a number is by default a double type. We will need to specify the number as an integer explicitly.
Try again:
> db.customers.insertOne({
name: "Bob",
email: "[email protected]",
age: NumberInt(21)
})
Output:
{
"acknowledged" : true,
"insertedId" : ObjectId("60b4c74b7a9653eb847a823b")
}
Success!
Let’s try some edge cases.
This should fail (name too long):
> db.customers.insertOne({
name: "Bobbbbbbbbbbbbbbbbbbbbbbyyyyyyyyyyyyyyyyyyyyyyyyyyy", // 51 characters long
email: "[email protected]",
})
This is valid:
> db.customers.insertOne({
name: "Bobbbbbbbbbbbbbbbbbbbbbbyyyyyyyyyyyyyyyyyyyyyyyyyy", // 50 characters long
email: "[email protected]",
})
This should fail (missing email):
> db.customers.insertOne({
name: "Bob",
age: NumberInt(21)
})
This should fail (name cannot be double):
> db.customers.insertOne({
name: 123.456,
email: "[email protected]",
age: NumberInt(21)
})
See what documents are in the collection:
> db.customers.find()
{ "_id" : ObjectId("60b4c74b7a9653eb847a823b"), "name" : "Bob", "email" : "[email protected]", "age" : 21 }
{ "_id" : ObjectId("60b4c7f87a9653eb847a823d"), "name" : "Bobbbbbbbbbbbbbbbbbbbbbbyyyyyyyyyyyyyyyyyyyyyyyyyy", "email" : "[email protected]" }
The validator is working, but the email addresses should be unique among the customers. To do that, we can specify email as a collection index and require it to be unique.
Collection index / Unique field
We can use the db.collection.createIndex()
method:
> db.customers.createIndex( { email: 1 }, { unique: true })
{
"ok" : 0,
"errmsg" : "Index build failed: beea42ff-778a-4a6c-b4cb-16fb39e7e66d: Collection sample_supplies.customers ( 6322ae8c-f2ba-4454-9025-ae7a8b49d17e ) :: caused by :: E11000 duplicate key error collection: sample_supplies.customers index: email_1 dup key: { email: \"[email protected]\" }",
"code" : 11000,
"codeName" : "DuplicateKey",
"keyPattern" : {
"email" : 1
},
"keyValue" : {
"email" : "[email protected]"
}
}
The method failed because there are duplicated email addresses. Let’s remove the document in question and try again:
> db.customers.find()
{ "_id" : ObjectId("60b4c74b7a9653eb847a823b"), "name" : "Bob", "email" : "[email protected]", "age" : 21 }
{ "_id" : ObjectId("60b4c7f87a9653eb847a823d"), "name" : "Bobbbbbbbbbbbbbbbbbbbbbbyyyyyyyyyyyyyyyyyyyyyyyyyy", "email" : "[email protected]" }
> db.customers.deleteOne({ _id: ObjectId("60b4c7f87a9653eb847a823d") })
{ "acknowledged" : true, "deletedCount" : 1 }
> db.customers.createIndex( { email: 1 }, { unique: true })
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
Let’s verify that it works:
// It should fail
> db.customers.insertOne({
name: "Bob the Second",
email: "[email protected]"
})
Output:
WriteError({
"index" : 0,
"code" : 11000,
"errmsg" : "E11000 duplicate key error collection: sample_supplies.customers index: email_1 dup key: { email: \"[email protected]\" }",
"op" : {
"_id" : ObjectId("60b4d88d7a9653eb847a8241"),
"name" : "Bob the Second",
"email" : "[email protected]"
}
})
WriteError@src/mongo/shell/bulk_api.js:458:48
mergeBatchResults@src/mongo/shell/bulk_api.js:855:49
executeBatch@src/mongo/shell/bulk_api.js:919:13
Bulk/this.execute@src/mongo/shell/bulk_api.js:1163:21
DBCollection.prototype.insertOne@src/mongo/shell/crud_api.js:264:9
@(shell):1:1
Now MongoDB will ensure that all emails are unique in the customers collection.
Modify schema
We can use JavaScript syntax to store and modify objects:
// Get the validator object
> let validator = db.getCollectionInfos({name: "customers"})[0].options.validator
// Overwrite the validator
> validator.$jsonSchema = {
required: [ "name", "email", "age"],
properties: {
name: {
bsonType: "string",
description: "name of customer",
maxLength: 50
},
email: {
bsonType: "string",
description: "email address",
maxLength: 100
},
age: {
bsonType: "int",
description: "age",
minimum: 0,
maximum: 130,
}
}
}
// Apply new validator
> db.runCommand({
collMod: "customers",
validator: validator
})
Output:
{
"required" : [
"name",
"email",
"age"
],
"properties" : {
"name" : {
"bsonType" : "string",
"description" : "name of customer",
"maxLength" : 50
},
"email" : {
"bsonType" : "string",
"description" : "email address",
"maxLength" : 100
},
"age" : {
"bsonType" : "int",
"description" : "age",
"minimum" : 0,
"maximum" : 130
}
}
}
However, changing the validation rules will not remove any existing documents that have violated the rules. The new validator only applies to new insertions and updates.
Now let’s try to insert a document that has no age:
> db.customers.insertOne({
name: "Bobby the Second",
email: "[email protected]",
})
Output:
WriteError({
"index" : 0,
"code" : 121,
"errmsg" : "Document failed validation",
"op" : {
"_id" : ObjectId("60b4d9977a9653eb847a8242"),
"name" : "Bobby the Second",
"email" : "[email protected]"
}
})
WriteError@src/mongo/shell/bulk_api.js:458:48
mergeBatchResults@src/mongo/shell/bulk_api.js:855:49
executeBatch@src/mongo/shell/bulk_api.js:919:13
Bulk/this.execute@src/mongo/shell/bulk_api.js:1163:21
DBCollection.prototype.insertOne@src/mongo/shell/crud_api.js:264:9
@(shell):1:1
We can see that the new validator is now enforcing age to be required.
Validate collection documents
To verify that the existing documents are conforming to the validation schema, we can use the db.collection.validate()
method:
> db.customers.validate()
{
"ns" : "sample_supplies.customers",
"nInvalidDocuments" : 0,
"nrecords" : 1,
"nIndexes" : 2,
"keysPerIndex" : {
"_id_" : 1,
"email_1" : 1
},
"indexDetails" : {
"_id_" : {
"valid" : true
},
"email_1" : {
"valid" : true
}
},
"valid" : true,
"warnings" : [ ],
"errors" : [ ],
"extraIndexEntries" : [ ],
"missingIndexEntries" : [ ],
"ok" : 1
}
We can see that there are no errors.