/* mmio_test.c - Test MMIO read latency of various network adaptors * * (C) 2005 by Robert Olsson , * Lennert Buytenhek , * Harald Welte , * Grant Grundler * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Part of it is that uncached accesses are plain slow. An L2 miss is ~370 cycles on my hardware (155ns*2.4 confirms that), but an uncached access to the same memory location is consistently ~490 cycles. And part of it seems to be the e1000. Reading the device control register (E1000_CTRL, 0x00) is ~1700 cycles interrupt cause register (E1000_ICR, 0xc0) is ~2100 cycles interrupt mask register (E1000_IMS, 0x100) is ~2100 cycles */ #include #include #include #include #include #include #include #include #include #include #include #include #include "get_clock.h" #include "mmio_test.h" #define VERSION "$Revision$" /* barrier() stolen from /usr/include/linux/compiler-gcc.h * This is NOT a memory barrier! * out-of-order memory arches will need to define and insert mb() * if volatile keyword is not sufficient. * */ #ifndef barrier #define barrier() __asm__ __volatile__("": : :"memory") #endif static int dev_mem_fd; static cpu_set_t affinity_mask; static float cpu_mhz = 0.0; static int opt_verbose = 0; static int opt_cycles_only = 0; static int output_xml = 0; /* core */ #define INNER_LOOP_COUNT 10240 static int cycles_compare(const void * aptr, const void * bptr) { const cycles_t *a = aptr; const cycles_t *b = bptr; if (*a < *b) return -1; if (*a > *b) return 1; return 0; } volatile unsigned int gcc_sink; /* prevent gcc from optimizing away accesses */ static cycles_t test_backend(volatile void *x, unsigned int offset) { unsigned int j; static cycles_t delta[INNER_LOOP_COUNT]; for (j = 0; j < INNER_LOOP_COUNT; j++) { cycles_t end; cycles_t start = get_cycles(); barrier(); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); gcc_sink = *((volatile unsigned int *)(x + offset)); barrier(); end = get_cycles(); barrier(); delta[j] = (end - start)/16; } qsort(delta, INNER_LOOP_COUNT, sizeof(cycles_t), cycles_compare); return delta[INNER_LOOP_COUNT/2]; /* return median */ } static int generic_test(struct test_device *n, struct pci_dev *p, int cpu) { struct test_register *reg; for (reg = n->regs; reg; reg = reg->next) { volatile void *x; cycles_t avg; unsigned long long regs = p->base_addr[reg->resource] & PCI_ADDR_MEM_MASK; //printf("Using regs = 0x%llx (%d)\n", regs, reg->mmap_size); x = mmap(NULL, reg->mmap_size, PROT_READ, MAP_SHARED, dev_mem_fd, regs); if (x == MAP_FAILED) { perror("mmap"); return -1; } avg = test_backend(x, reg->offset); if (output_xml) { printf("name, p->vendor_id, p->device_id, p->domain, p->bus, p->dev, p->func, cpu, reg->resource, reg->offset, avg); if (cpu_mhz != 0.0) { float ns = avg / (cpu_mhz/1000); printf(" time=\"%f\"", ns); } printf("/>\n"); } else { if (p->domain) printf("%04x:", p->domain); printf("%02x:%02x.%d (%04x:%04x): %s from CPU%u ", p->bus, p->dev, p->func, p->vendor_id, p->device_id, n->name, cpu); printf("%30s (%04x): %5li cycles ", reg->name, reg->offset, (long) avg); if (cpu_mhz != 0.0 && !opt_cycles_only) { float ns = avg / (cpu_mhz/1000); printf("%5.0f ns\n", ns); } else printf("\n"); } fflush(stdout); munmap((void *)x, 4096); } return 0; } static int test_device(struct test_device *ndev, struct pci_dev *dev) { cpu_set_t cur_mask; unsigned int cpu; for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { if (!CPU_ISSET(cpu, &affinity_mask)) continue; CPU_ZERO(&cur_mask); CPU_SET(cpu, &cur_mask); if (sched_setaffinity(0, sizeof(cur_mask), &cur_mask) < 0) { perror("sched_setaffinity() failed, cannot guarantee CPU affinity!:"); } if (!output_xml) { } if (generic_test(ndev, dev, cpu) < 0) return -1; } return 0; } static void print_usage(void) { printf( "This program is free sfotware with ABSOLUTELY NO WARRANTY.\n\n" "Parameters:\n" "\t-h --help\t\tThis help page\n" "\t-V --version\t\tPrint version information\n" "\t-c --cycles-only\tPrint all values in raw cycle counts\n" "\t-f --file\t\tFilename of XML register description\n" "\t-x --xml\t\tOutput in XML format\n" ); } static struct option opts[] = { { "version", 0, NULL, 'V' }, { "help", 0, NULL, 'h' }, { "cycles", 0, NULL, 'c' }, { "file", 1, NULL, 'f' }, { "xml", 0, NULL, 'x' }, { 0 } }; #define MMIO_TEST_FILE "./mmio_test.xml" int main(int argc, char **argv) { struct test_device *test_devices; struct pci_access *pci_a; struct pci_dev *p; int found = 0; int argch; char *file = MMIO_TEST_FILE; while ((argch = getopt_long(argc, argv, "cvVhf:x", opts, NULL)) != -1) { switch (argch) { case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option `-%c'.\n", optopt); else fprintf(stderr, "Unknown option character " "`\\x%x'.\n", optopt); /* fallthrough */ default: print_usage(); exit(1); break; case 'h': print_usage(); exit(0); break; case 'c': opt_cycles_only = 1; break; case 'v': opt_verbose = 1; break; case 'V': printf("%s - MMIO latency test program, Version %s\n", argv[0], VERSION); exit(0); break; case 'f': file = optarg; break; case 'x': output_xml = 1; break; } } if (sched_getaffinity(0, sizeof(affinity_mask), &affinity_mask) < 0) perror("sched_getaffinity()"); fprintf(stderr, "affinity mask: %lx\n", *(unsigned long *)(&affinity_mask)); fprintf(stderr, "%s - MMIO latency test program, Version %s\n\n", argv[0], VERSION); fprintf(stderr, "WARNING: " "THIS PROGRAM WILL LIKELY INTERFERE WITH THE DRIVER\n\n"); cpu_mhz = get_cpu_mhz(); fprintf(stderr, "CPU speed = %6.0f MHz\n", cpu_mhz); dev_mem_fd = open("/dev/mem", O_RDWR | O_SYNC); if (dev_mem_fd < 0) { perror("open"); fprintf(stderr, "you must run this as root, sorry\n"); exit(-1); } test_devices = parse_xml("mmio_test.xml"); if (!test_devices) exit(-2); pci_a = pci_alloc(); if (!pci_a) { perror("pci_alloc"); exit(-1); } pci_init(pci_a); pci_scan_bus(pci_a); for (p = pci_a->devices; p; p = p->next) { struct test_device *dev; for (dev = test_devices; dev; dev = dev->next) { struct test_pci_id *pid; for (pid = dev->pci_ids; pid; pid = pid->next) { if (pid->vendor_id == p->vendor_id && pid->device_id == p->device_id) { test_device(dev, p); found++; } } } } if (found == 0) fprintf(stderr, "No supported devices found\n"); exit(0); }