mirror of
https://github.com/fluencelabs/redis
synced 2025-06-18 11:41:20 +00:00
Adding real allocator fragmentation to INFO and MEMORY command + active defrag test
other fixes / improvements: - LUA script memory isn't taken from zmalloc (taken from libc malloc) so it can cause high fragmentation ratio to be displayed (which is false) - there was a problem with "fragmentation" info being calculated from RSS and used_memory sampled at different times (now sampling them together) other details: - adding a few more allocator info fields to INFO and MEMORY commands - improve defrag test to measure defrag latency of big keys - increasing the accuracy of the defrag test (by looking at real grag info) this way we can use an even lower threshold and still avoid false positives - keep the old (total) "fragmentation" field unchanged, but add new ones for spcific things - add these the MEMORY DOCTOR command - deduct LUA memory from the rss in case of non jemalloc allocator (one for which we don't "allocator active/used") - reduce sampling rate of the rss and allocator info
This commit is contained in:
91
src/object.c
91
src/object.c
@ -876,8 +876,23 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
|
||||
mh->total_allocated = zmalloc_used;
|
||||
mh->startup_allocated = server.initial_memory_usage;
|
||||
mh->peak_allocated = server.stat_peak_memory;
|
||||
mh->fragmentation =
|
||||
zmalloc_get_fragmentation_ratio(server.resident_set_size);
|
||||
mh->total_frag =
|
||||
(float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
|
||||
mh->total_frag_bytes =
|
||||
server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
|
||||
mh->allocator_frag =
|
||||
(float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
|
||||
mh->allocator_frag_bytes =
|
||||
server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
|
||||
mh->allocator_rss =
|
||||
(float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
|
||||
mh->allocator_rss_bytes =
|
||||
server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
|
||||
mh->rss_extra =
|
||||
(float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
|
||||
mh->rss_extra_bytes =
|
||||
server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
|
||||
|
||||
mem_total += server.initial_memory_usage;
|
||||
|
||||
mem = 0;
|
||||
@ -980,6 +995,9 @@ sds getMemoryDoctorReport(void) {
|
||||
int empty = 0; /* Instance is empty or almost empty. */
|
||||
int big_peak = 0; /* Memory peak is much larger than used mem. */
|
||||
int high_frag = 0; /* High fragmentation. */
|
||||
int high_alloc_frag = 0;/* High allocator fragmentation. */
|
||||
int high_proc_rss = 0; /* High process rss overhead. */
|
||||
int high_alloc_rss = 0; /* High rss overhead. */
|
||||
int big_slave_buf = 0; /* Slave buffers are too big. */
|
||||
int big_client_buf = 0; /* Client buffers are too big. */
|
||||
int num_reports = 0;
|
||||
@ -995,12 +1013,30 @@ sds getMemoryDoctorReport(void) {
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* Fragmentation is higher than 1.4? */
|
||||
if (mh->fragmentation > 1.4) {
|
||||
/* Fragmentation is higher than 1.4 and 10MB ?*/
|
||||
if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
|
||||
high_frag = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* External fragmentation is higher than 1.1 and 10MB? */
|
||||
if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
|
||||
high_alloc_frag = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* Allocator fss is higher than 1.1 and 10MB ? */
|
||||
if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
|
||||
high_alloc_rss = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* Non-Allocator fss is higher than 1.1 and 10MB ? */
|
||||
if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
|
||||
high_proc_rss = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* Clients using more than 200k each average? */
|
||||
long numslaves = listLength(server.slaves);
|
||||
long numclients = listLength(server.clients)-numslaves;
|
||||
@ -1034,7 +1070,16 @@ sds getMemoryDoctorReport(void) {
|
||||
s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
|
||||
}
|
||||
if (high_frag) {
|
||||
s = sdscatprintf(s," * High fragmentation: This instance has a memory fragmentation greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
|
||||
s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
|
||||
}
|
||||
if (high_alloc_frag) {
|
||||
s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
|
||||
}
|
||||
if (high_alloc_rss) {
|
||||
s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
|
||||
}
|
||||
if (high_proc_rss) {
|
||||
s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to LUA scripts or Modules.\n\n");
|
||||
}
|
||||
if (big_slave_buf) {
|
||||
s = sdscat(s," * Big slave buffers: The slave output buffers in this instance are greater than 10MB for each slave (on average). This likely means that there is some slave instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what slave is not receiving data correctly and why. You can use the INFO output in order to check the slaves delays and the CLIENT LIST command to check the output buffers of each slave.\n\n");
|
||||
@ -1148,7 +1193,7 @@ void memoryCommand(client *c) {
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
|
||||
struct redisMemOverhead *mh = getMemoryOverheadData();
|
||||
|
||||
addReplyMultiBulkLen(c,(14+mh->num_dbs)*2);
|
||||
addReplyMultiBulkLen(c,(24+mh->num_dbs)*2);
|
||||
|
||||
addReplyBulkCString(c,"peak.allocated");
|
||||
addReplyLongLong(c,mh->peak_allocated);
|
||||
@ -1202,8 +1247,38 @@ void memoryCommand(client *c) {
|
||||
addReplyBulkCString(c,"peak.percentage");
|
||||
addReplyDouble(c,mh->peak_perc);
|
||||
|
||||
addReplyBulkCString(c,"fragmentation");
|
||||
addReplyDouble(c,mh->fragmentation);
|
||||
addReplyBulkCString(c,"allocator.allocated");
|
||||
addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
|
||||
|
||||
addReplyBulkCString(c,"allocator.active");
|
||||
addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
|
||||
|
||||
addReplyBulkCString(c,"allocator.resident");
|
||||
addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
|
||||
|
||||
addReplyBulkCString(c,"allocator-fragmentation.ratio");
|
||||
addReplyDouble(c,mh->allocator_frag);
|
||||
|
||||
addReplyBulkCString(c,"allocator-fragmentation.bytes");
|
||||
addReplyLongLong(c,mh->allocator_frag_bytes);
|
||||
|
||||
addReplyBulkCString(c,"allocator-rss.ratio");
|
||||
addReplyDouble(c,mh->allocator_rss);
|
||||
|
||||
addReplyBulkCString(c,"allocator-rss.bytes");
|
||||
addReplyLongLong(c,mh->allocator_rss_bytes);
|
||||
|
||||
addReplyBulkCString(c,"rss-overhead.ratio");
|
||||
addReplyDouble(c,mh->rss_extra);
|
||||
|
||||
addReplyBulkCString(c,"rss-overhead.bytes");
|
||||
addReplyLongLong(c,mh->rss_extra_bytes);
|
||||
|
||||
addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
|
||||
addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
|
||||
|
||||
addReplyBulkCString(c,"fragmentation.bytes");
|
||||
addReplyLongLong(c,mh->total_frag_bytes);
|
||||
|
||||
freeMemoryOverheadData(mh);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
|
||||
|
Reference in New Issue
Block a user