MongoDB: Tips & Tricks
Q: How can I get pretty-printed JSON by default in mongo shell?
Add this to your ~/.mongorc.js
:
DBQuery.prototype._prettyShell = true
DBQuery.prototype.unpretty = function () {
this._prettyShell = false;
return this;
}
After you do this, db.persons.find({ _id: 71077345 })
will be
pretty-printed and db.persons.find({ _id: 71077345 }).unpretty()
can be
used to opt out of pretty-printing where needed. Opting out of pretty-printing
is useful when you want to get an overview a small number of fields taken from
multiple documents (i.e. each projected document still fits on a single line),
for example calls like db.persons.find({}, {name:1, email:1})
are often
hard to read unless you append .unpretty()
.
Q: What controls how much .find() prints before asking to type “it”?
Set DBQuery.shellBatchSize = 50
or similar in ~/.mongorc.js
.
Q: How can I see how a particular MongoDB server is configured?
First login to the server via mongo shell, then you can find the command line
arguments that was used to start mongod
(i.e. switches in general and
--config foo.conf
in particular) using:
> use admin
> db.runCommand({ getCmdLineOpts: 1 })
Q: How can I aggregate the system profile log to find the last/slowest query per user?
Turn on the database profiler by running db.setProfilingLevel(2)
, then
wait a while, and finally run something like:
// Search profiling log for the last query each user did
db.system.profile.aggregate([
{ $sort: { ts: 1 }},
{ $group: {
_id: { user: "$user", client: "$client" },
millis: { $last: "$millis" },
op: { $last: "$op" },
query: { $last: "$query"}
}}
])
// Search profiling log for the slowest query each user did
db.system.profile.aggregate([
{ $sort: { millis: 1 }},
{ $group: {
_id: { user: "$user", client: "$client" },
millis: { $last: "$millis" },
op: { $last: "$op" },
query: { $last: "$query"}
}}
])
Instead of the above two commands, you can put the snippet below in your
~/.mongorc.js
and get a slightly more pretty-printed overview of
slowest/last query per user by running the lastms()
and highms()
functions:
function userprof(sort, kind) {
const summary = db.system.profile.aggregate([
{ $sort: sort },
{ $group: {
_id: { user: "$user", client: "$client" },
last: { $last: "$$ROOT" }}}
]).map(user => {
const details = JSON.stringify(user.last.query || user.last.command);
return `${kind} operation (${user.last.op})
by ${user._id.user || "anonymous"}
from ${user._id.client}
took ${user.last.millis} ms
at ${user.last.ts}
details: ${details.substring(0,80)} ...`.replace(/\n */gm, ' ')
})
db.system.profile.aggregate([
{ $sort: { ts:1 } },
{ $group: {
_id: null,
first: { $first: "$ts" },
last: { $last: "$ts" }
}}
]).forEach(res => {
print("Based on system.profile data from: ", res.first)
print(" to: ", res.last)
});
print(summary.join("\n"))
}
lastms = () => userprof({ts:1}, "last")
highms = () => userprof({millis:1}, "slowest")
Q: How can I make sure that I don’t accidentally run a query that takes too much CPU / time?
For very large collections it can be very CPU / time expensive to run queries
that uses non-indexed fields. In production you probably shouldn’t do unindexed
querying at all, but if you’re just trying out queries in mongo shell, or if you
are digging out some one-off debugging information from the database you might
need to do it. In these situations, you can limit the maximum number of
documents to scan by using the .maxScan()
cursor method, and/or you can
limit the maximum numbers of milliseconds a query may take using the
.maxTimeMS()
cursor method.
// Try to find documents with property "unindexed" set to "some value", but
// spend at most 30 ms looking and then return an "exceeded time limit" error.
db.things.find({ unindexed: "some value" }).maxTimeMS(30)
// Try to find documents with property "unindexed" set to "some value", but
// check in at most 1000 documents before giving up (no error, just empty result)
db.things.find({ unindexed: "some value" }).maxScan(1000)
Q: If Mongo DB is unusually slow, how can I see what it is doing?
Sudden slowness can be due to many things, for example:
- unusually large amount of queries / second
- queries that use unindexed fields in large collections
- a large index being built
- CPU heavy
.mapReduce()
operations
You can list the on-going operations by running db.currentOp().inprog
and
paying extra attention to the entries with large .secs_running
values. To
see the relevant query look in the .query
field. There is also an
.opid
field which contains an operation ID that you can pass to
db.killOp()
to abort that particular operation (you may only abort
operations associated with a .client
though, never internal database
operations). Finally, for index building and map reduce operations you can find
progress information in the .progress
field.
If you want a slightly nicer formatting, you can put the snippet below in your
~/.mongorc.js
and then run currentOps()
instead:
String.prototype.ljust = function (width, padChar) {
padChar = padChar || ' ';
if (this.length < width)
return this + Array(width - this.length + 1).join(padChar);
else
return this;
};
function currentOps() {
var COLUMNS = [
{ title: 'OPID', field: 'opid', width: 20 },
{ title: 'CLIENT', field: 'client', width: 22 },
{ title: 'SECS', field: 'secs_running', width: 7 },
{ title: 'PRGR', field: 'progress', width: 5 },
{ title: 'DESC', field: 'desc', width: 45 },
{ title: 'QUERY', field: 'query', type: 'json' },
];
print(COLUMNS.map(function (col) {
return col.title.ljust(col.width);
}).join(''));
db.currentOp().inprog.forEach(function (op) {
print(COLUMNS.map(function (col) {
if (col.type == 'json') {
return JSON.stringify(op[col.field] || '');
} else {
return ('' + (op[col.field] || '')).ljust(col.width)
}
}).join(''));
});
}
Q: How can I tell which of my collections are taking the most time in queries?
Launch mongotop --host HOST --port PORT --username USER -p
. By default it
updates once every second and the total column shows how much of that time was
spent in each collection.
Q: How can I see the current number of inserts/queries etc per second?
Use mongostat --host HOST --port PORT --username USER -p
for a single
server and append --discover
for sharded/replication deployments (in this
case mongostat reports separate QPS etc for each member).
The first 4 numbers (i.e. insert/query/update/delete) are the number of such operations per second.
Q: How can I create a large index in the background without disrupting other queries?
Set the background
option, like this:
db.persons.createIndex({ name: 1 }, { background: true })
The drawback of building an index in the background is that it usually takes a
bit longer and will initially result in an index that consumes more storage
space, but over time the size will be the same as an index built in the
foreground. Also note that even if you build the index with background:
true
, the createIndex() call will still block in mongo shell until the index
is finished.
Q: Why doesn’t Mongo output proper JSON? Why does NumberLong() etc exist?
The Javascript numbers are represented by the data type Number
which a
64-bit floating point type, and Javascript (and JSON) doesn’t have any 64-bit
integer data types.
The largest integer that can be stored in a signed 64-bit in Java etc, i.e.
9223372036854775807, it cannot be stored in Javascript. If you fire up a JS repl
and do x = 9223372036854775807
, the value that will be stored is
9223372036854776000
instead of 9223372036854775807
. However, all
integers up to and including 9007199254740991 (available as
Number.MAX_SAFE_INTEGER) can be represented exactly in Javascript so it’s usable
as a ln(9007199254740991)/ln(2) = 53-bit
integer type at least. The
problem is that databases really need proper 64-bit integers so the Mongo DB
authors had no option but to extend Javascript and thus making JSON export of
mongo data a bit more complicated. For example, you can’t pipe Mongo DB output
directly to generic JSON processing tools like jq
etc without { "_id" :
NumberLong("11261548633"), ... }
hitting “parse error: Invalid numeric
literal” or similar.
If you really want proper JSON, you need to run an export via the
mongoexport
command instead:
$ mongoexport --quiet -d some_db -c some_collection -q '{}' | jq .
{
"_id": {
"$oid": "58832c084b6e95ac2050eac4"
},
"some_number": 123,
"some_numberlong": {
"$numberLong": "123"
},
"some_isodate": {
"$date": "1818-05-05T00:00:00.000Z"
}
}
If you do that, all non-JSON constructs including NumberLong, ISODate and
ObjectID etc will be stored as special { "$keyword": "value_as_string" }
.
The -q
parameter passed to mongoexport can be used to specify any query.
Q: How can I tell which storage engine that Mongo DB is using?
Check by running this in mongo shell:
db.serverStatus().storageEngine
WiredTiger is enabled by default if you create a new database using 3.x but if
you upgraded from an earlier version of Mongo DB you might still be running the
older mmapv1
engine. WiredTiger doesn’t just have different performance
characteristics compared to older engines, it also has document-level locking so
it’s a good idea to have the same storage engine on development and production
machines.
Q: How can I create a TTL collection?
TTL (“time to live”) collections delete their documents when a a timestamp field is older than given amount of time.
// Automatically delete docs with "when" timestamp older than 10 secs
db.log_events.createIndex({ when: 1 }, { expireAfterSeconds: 10 })
// Insert a document with an ISODate() timestamp
db.log_events.insert({ when: new Date() })
Q: How can I see when a document was created?
It’s a good idea to have an explicitly “created” field if you need it, but if
you didn’t create one, there is a trick to find out when a document was created
anyway (at least for documents where the _id
is an ObjectID
, just
do:
ObjectId("505bd76785ebb509fc183733").getTimestamp()
Q: What is db.foo.mapReduce() used for?
db.foo.mapReduce() evaluates aggregate functions over a collection, just like db.foo.aggregate() does. The former is typically a lot slower, but it is more flexible because the map/reduce functions are specified as Javascript functions. The latter only has a fixed (but quite large) number of possible aggregation operators etc, and they are all implemented in native code for maximum performance.
To call .mapReduce()
just pass a map function, a reduce function and then
set options like a query, and out: { inline: 1}
to return the results
directly instead of storing them in a new collection.
Beware that, because .mapReduce()
passes the input to the map function
via this
, you cannot use ES6 fat arrows when you define the map function.
// Find all distinct values used for .type in "accounts" documents
db.accounts.mapReduce(
function () {
emit("types", this.type);
},
function (key, values) {
return { unique: [...new Set(values)] }
},
{
query: {},
out: { inline: 1 }
}
)