Identifying and Reclaiming Disk Space in MongoDB

Last Updated: Mar 30, 2020

A common question when it comes to MongoDB and the (default) storage engine (WiredTiger) is “Why is it after I removed a bunch of documents my free space didn’t increase”?

The WiredTiger storage engine maintains lists of empty records in data files as it deletes documents. This space can be reused by WiredTiger, but will not be returned to the operating system unless under very specific circumstances.

The amount of empty space available for reuse by WiredTiger is reflected in the output of db.collection.stats() under the heading wiredTiger.block-manager.file bytes available for reuse.

To allow the WiredTiger storage engine to release this empty space to the operating system, you can de-fragment your data file. This can be achieved using the compact command.

As the db.collection.stats() command must be run one collection at a time I’ve written the following script to enhance this functionality as follows:

  • scan all namespaces (databases + collections)
  • include index space details
  • support for sharded collections
  • output to CSV

/
* Print storage details for all collections and indexes.
* Supports sharded clusters
*
* @author alex.bevilacqua@mongodb.com
* @version 1.1
* @updated 2020-05-14
*
* History:
* 1.1 - Include Document Count / Average Object Size
* 1.0 - Initial Release
/

var fmt = function (bytes) { // comment this out to format the results return bytes;

var sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
if (bytes == 0) return '0 Byte';
var i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024)));
return Math.round(bytes / Math.pow(1024, i), 2) + ' ' + sizes[i];

}

var getDetail = function (label, stats) { var detail = { name: label, count: stats.count, avgSize: stats.avgObjSize, size: stats.size, storageSize: stats.storageSize, reusableSpace: stats.wiredTiger["block-manager"]["file bytes available for reuse"], indexSpace: stats.totalIndexSize, indexReusable: 0, };

var indexKeys = Object.keys(stats.indexDetails);
for (var i = 0; i < indexKeys.length; i++) {
    detail.indexReusable += stats.indexDetails[indexKeys[i]]["block-manager"]["file bytes available for reuse"];
}

return detail;

}

var dbSizeReport = function (dbname) { var results = [] db.getSiblingDB(dbname).getCollectionNames().forEach(function © { var coll = db.getSiblingDB(dbname).getCollection©; var s = coll.stats({ indexDetails: true }); if (s.hasOwnProperty("sharded") && s.sharded) { var shards = Object.keys(s.shards); for (var i = 0; i < shards.length; i++) { var shard = shards[i]; var shardStat = s.shards[shard]; results.push(getDetail(s.ns + " (" + shard + ")", shardStat)); } } else { results.push(getDetail(s.ns, s)); } });

var totals = [0, 0, 0, 0, 0];
print([&quot;Namespace&quot;, &quot;Total Documents&quot;, &quot;Average Document Size&quot;, &quot;Uncompressed&quot;, &quot;Compressed&quot;, &quot;Reusable from Collections&quot;, &quot;Indexes&quot;, &quot;Reusable from Indexes&quot;].join(&quot;,&quot;))
for (var i = 0; i &lt; results.length; i++) {
    var row = results[i];
    print([row.name, row.count, row.avgSize, fmt(row.size), fmt(row.storageSize), fmt(row.reusableSpace), fmt(row.indexSpace), fmt(row.indexReusable)].join(&quot;,&quot;))
    totals[0] += row.size;
    totals[1] += row.storageSize;
    totals[2] += row.reusableSpace;
    totals[3] += row.indexSpace;
    totals[4] += row.indexReusable;
}

print([&quot;Total&quot;, &quot;&quot;, &quot;&quot;, fmt(totals[0]), fmt(totals[1]), fmt(totals[2]), fmt(totals[3]), fmt(totals[4])].join(&quot;,&quot;));

}

db.getMongo().getDBNames().forEach(function (dbname) { print("———————") print(dbname); print("———————") dbSizeReport(dbname); });

Running this script from a mongo shell will produce output similar to the following:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
---------------------
admin
---------------------
Namespace,Uncompressed,Compressed,Reusable from Collections,Indexes,Reusable from Indexes
admin.system.keys (config),255 Bytes,36 KB,16 KB,36 KB,16 KB
admin.system.version (config),59 Bytes,20 KB,0 Byte,20 KB,0 Byte
Total,314 Bytes,56 KB,16 KB,56 KB,16 KB
---------------------
config
---------------------
Namespace,Uncompressed,Compressed,Reusable from Collections,Indexes,Reusable from Indexes
config.actionlog (config),32 KB,40 KB,16 KB,40 KB,16 KB
config.changelog (config),346 KB,132 KB,52 KB,96 KB,44 KB
config.chunks (config),57 KB,52 KB,24 KB,144 KB,64 KB
config.collections (config),431 Bytes,36 KB,16 KB,36 KB,16 KB
config.databases (config),108 Bytes,20 KB,0 Byte,20 KB,0 Byte
config.lockpings (config),3 KB,36 KB,16 KB,72 KB,32 KB
config.locks (config),771 Bytes,36 KB,16 KB,108 KB,48 KB
config.migrations (config),0 Byte,24 KB,16 KB,48 KB,32 KB
config.mongos (config),342 Bytes,36 KB,16 KB,20 KB,0 Byte
config.settings (config),39 Bytes,20 KB,0 Byte,20 KB,0 Byte
config.shards (config),297 Bytes,20 KB,0 Byte,44 KB,4 KB
config.system.sessions (shard01),99 Bytes,36 KB,16 KB,60 KB,20 KB
config.tags (config),0 Byte,4 KB,0 Byte,24 KB,4 KB
config.transactions (config),0 Byte,24 KB,16 KB,12 KB,4 KB
config.version (config),83 Bytes,20 KB,0 Byte,20 KB,0 Byte
Total,441 KB,536 KB,204 KB,764 KB,284 KB
---------------------
test
---------------------
Namespace,Uncompressed,Compressed,Reusable from Collections,Indexes,Reusable from Indexes
test.test1 (shard01),37 MB,37 MB,27 MB,26 MB,16 MB
test.test1 (shard02),37 MB,8 MB,52 KB,5 MB,2 MB
test.test1 (shard03),38 MB,8 MB,56 KB,5 MB,2 MB
test.ups_test (shard01),0 Byte,24 KB,16 KB,72 KB,48 KB
Total,112 MB,54 MB,27 MB,36 MB,19 MB

This output can then being imported into your favourite spreadsheet for further manipulation.

Based on this sample output, the test.test1 collection on shard01 could reclaim approximately 27MB if compacted. Note that the amount of space reclaimed will not necessarily be exactly what is reported here, but is generally a good guideline as to how much space may be reclaimed.

Comments