redis/src/memtest.c
antirez cd99c14e38 On crash memory test rewrote so that it actaully works.
1) We no longer test location by location, otherwise the CPU write cache
completely makes our business useless.
2) We still need a memory test that operates in steps from the first to
the last location in order to never hit the cache, but that is still
able to retain the memory content.

This was tested using a Linux box containing a bad memory module with a
zingle bit error (always zero).

So the final solution does has an error propagation step that is:

1) Invert bits at every location.
2) Swap adiacent locations.
3) Swap adiacent locations again.
4) Invert bits at every location.
5) Swap adiacent locations.
6) Swap adiacent locations again.

Before and after these steps, and after step 4, a CRC64 checksum is computed.
If the three CRC64 checksums don't match, a memory error was detected.
2012-11-29 10:24:41 +01:00

282 lines
9.2 KiB
C

/*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <errno.h>
#include <termios.h>
#include <sys/ioctl.h>
#include "config.h"
#if (ULONG_MAX == 4294967295UL)
#define MEMTEST_32BIT
#elif (ULONG_MAX == 18446744073709551615ULL)
#define MEMTEST_64BIT
#else
#error "ULONG_MAX value not supported."
#endif
#ifdef MEMTEST_32BIT
#define ULONG_ONEZERO 0xaaaaaaaaUL
#define ULONG_ZEROONE 0x55555555UL
#else
#define ULONG_ONEZERO 0xaaaaaaaaaaaaaaaaUL
#define ULONG_ZEROONE 0x5555555555555555UL
#endif
static struct winsize ws;
size_t progress_printed; /* Printed chars in screen-wide progress bar. */
size_t progress_full; /* How many chars to write to fill the progress bar. */
void memtest_progress_start(char *title, int pass) {
int j;
printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */
/* Fill with dots. */
for (j = 0; j < ws.ws_col*(ws.ws_row-2); j++) printf(".");
printf("Please keep the test running several minutes per GB of memory.\n");
printf("Also check http://www.memtest86.com/ and http://pyropus.ca/software/memtester/");
printf("\x1b[H\x1b[2K"); /* Cursor home, clear current line. */
printf("%s [%d]\n", title, pass); /* Print title. */
progress_printed = 0;
progress_full = ws.ws_col*(ws.ws_row-3);
fflush(stdout);
}
void memtest_progress_end(void) {
printf("\x1b[H\x1b[2J"); /* Cursor home, clear screen. */
}
void memtest_progress_step(size_t curr, size_t size, char c) {
size_t chars = ((unsigned long long)curr*progress_full)/size, j;
for (j = 0; j < chars-progress_printed; j++) {
printf("%c",c);
progress_printed++;
}
fflush(stdout);
}
/* Test that addressing is fine. Every location is populated with its own
* address, and finally verified. This test is very fast but may detect
* ASAP big issues with the memory subsystem. */
void memtest_addressing(unsigned long *l, size_t bytes) {
unsigned long words = bytes/sizeof(unsigned long);
unsigned long j, *p;
/* Fill */
p = l;
for (j = 0; j < words; j++) {
*p = (unsigned long)p;
p++;
if ((j & 0xffff) == 0) memtest_progress_step(j,words*2,'A');
}
/* Test */
p = l;
for (j = 0; j < words; j++) {
if (*p != (unsigned long)p) {
printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
(void*) p, *p);
exit(1);
}
p++;
if ((j & 0xffff) == 0) memtest_progress_step(j+words,words*2,'A');
}
}
/* Fill words stepping a single page at every write, so we continue to
* touch all the pages in the smallest amount of time reducing the
* effectiveness of caches, and making it hard for the OS to transfer
* pages on the swap. */
void memtest_fill_random(unsigned long *l, size_t bytes) {
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long iwords = words/step; /* words per iteration */
unsigned long off, w, *l1, *l2;
assert((bytes & 4095) == 0);
for (off = 0; off < step; off++) {
l1 = l+off;
l2 = l1+words;
for (w = 0; w < iwords; w++) {
#ifdef MEMTEST_32BIT
*l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
(((unsigned long) (rand()&0xffff)) << 16);
#else
*l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
(((unsigned long) (rand()&0xffff)) << 16) |
(((unsigned long) (rand()&0xffff)) << 32) |
(((unsigned long) (rand()&0xffff)) << 48);
#endif
l1 += step;
l2 += step;
if ((w & 0xffff) == 0)
memtest_progress_step(w+iwords*off,words,'R');
}
}
}
/* Like memtest_fill_random() but uses the two specified values to fill
* memory, in an alternated way (v1|v2|v1|v2|...) */
void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
unsigned long v2, char sym)
{
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long iwords = words/step; /* words per iteration */
unsigned long off, w, *l1, *l2, v;
assert((bytes & 4095) == 0);
for (off = 0; off < step; off++) {
l1 = l+off;
l2 = l1+words;
v = (off & 1) ? v2 : v1;
for (w = 0; w < iwords; w++) {
#ifdef MEMTEST_32BIT
*l1 = *l2 = ((unsigned long) v) |
(((unsigned long) v) << 16);
#else
*l1 = *l2 = ((unsigned long) v) |
(((unsigned long) v) << 16) |
(((unsigned long) v) << 32) |
(((unsigned long) v) << 48);
#endif
l1 += step;
l2 += step;
if ((w & 0xffff) == 0)
memtest_progress_step(w+iwords*off,words,sym);
}
}
}
void memtest_compare(unsigned long *l, size_t bytes) {
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long w, *l1, *l2;
assert((bytes & 4095) == 0);
l1 = l;
l2 = l1+words;
for (w = 0; w < words; w++) {
if (*l1 != *l2) {
printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
(void*)l1, (void*)l2, *l1, *l2);
exit(1);
}
l1 ++;
l2 ++;
if ((w & 0xffff) == 0) memtest_progress_step(w,words,'=');
}
}
void memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times) {
int j;
for (j = 0; j < times; j++) {
memtest_progress_start("Compare",pass);
memtest_compare(m,bytes);
memtest_progress_end();
}
}
void memtest_test(size_t megabytes, int passes) {
size_t bytes = megabytes*1024*1024;
unsigned long *m = malloc(bytes);
int pass = 0;
if (m == NULL) {
fprintf(stderr,"Unable to allocate %zu megabytes: %s",
megabytes, strerror(errno));
exit(1);
}
while (pass != passes) {
pass++;
memtest_progress_start("Addressing test",pass);
memtest_addressing(m,bytes);
memtest_progress_end();
memtest_progress_start("Random fill",pass);
memtest_fill_random(m,bytes);
memtest_progress_end();
memtest_compare_times(m,bytes,pass,4);
memtest_progress_start("Solid fill",pass);
memtest_fill_value(m,bytes,0,(unsigned long)-1,'S');
memtest_progress_end();
memtest_compare_times(m,bytes,pass,4);
memtest_progress_start("Checkerboard fill",pass);
memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C');
memtest_progress_end();
memtest_compare_times(m,bytes,pass,4);
}
}
void memtest_non_destructive_invert(void *addr, size_t size) {
volatile unsigned long *p = addr;
size_t words = size / sizeof(unsigned long);
size_t j;
/* Invert */
for (j = 0; j < words; j++)
p[j] = ~p[j];
}
void memtest_non_destructive_swap(void *addr, size_t size) {
volatile unsigned long *p = addr;
size_t words = size / sizeof(unsigned long);
size_t j;
/* Swap */
for (j = 0; j < words; j += 2) {
unsigned long a, b;
a = p[j];
b = p[j+1];
p[j] = b;
p[j+1] = a;
}
}
void memtest(size_t megabytes, int passes) {
if (ioctl(1, TIOCGWINSZ, &ws) == -1) {
ws.ws_col = 80;
ws.ws_row = 20;
}
memtest_test(megabytes,passes);
printf("\nYour memory passed this test.\n");
printf("Please if you are still in doubt use the following two tools:\n");
printf("1) memtest86: http://www.memtest86.com/\n");
printf("2) memtester: http://pyropus.ca/software/memtester/\n");
exit(0);
}