很多的公司,包括我们公司,还在使用CentOS5。但内核早已换成el6系列的2.6.32内核。perf工具是内核自带的一个性能评估工具,功能很强大,为了在CentOS5下进行性能优化,我们要在CentOS5下对2.6.32内核的perf源码进行编译。我这里使用了linux-2.6.32-358.el6的内核源码。
进入源码目录,打上以下补丁:
1、004-150-perf-tools-Fix-build-with-bison-2.3-and-older..patch
--- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -276,13 +276,13 @@ $(OUTPUT)util/parse-events-flex.c: util/ $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c + $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c + $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,5 +1,4 @@ %pure-parser -%name-prefix "parse_events_" %parse-param {void *_data} %parse-param {void *scanner} %lex-param {void* scanner} --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -1,4 @@ -%name-prefix "perf_pmu_" %parse-param {struct list_head *format} %parse-param {char *name}2、10-13-perf-tools-replace-mkostemp-with-mkstemp.patch
diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c index 541cdc7..c6caede 100644 --- a/tools/perf/util/dso-test-data.c +++ b/tools/perf/util/dso-test-data.c @@ -23,7 +23,7 @@ static char *test_file(int size) int fd,i; unsigned char *buf; - fd = mkostemp(templ,O_CREAT|O_WRONLY|O_TRUNC); + fd = mkstemp(templ); buf = malloc(size); if (!buf) {
3、tip-perf-core-perf-test-fix-a-build-error-on-builtin-test.patch
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index b5a544d..5d4354e 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -605,19 +605,13 @@ out_free_threads: #undef nsyscalls } -static int sched__get_first_possible_cpu(pid_t pid,cpu_set_t **maskp,- size_t *sizep) +static int sched__get_first_possible_cpu(pid_t pid,cpu_set_t *maskp) { - cpu_set_t *mask; - size_t size; int i,cpu = -1,nrcpus = 1024; realloc: - mask = cpu_ALLOC(nrcpus); - size = cpu_ALLOC_SIZE(nrcpus); - cpu_ZERO_S(size,mask); + cpu_ZERO(maskp); - if (sched_getaffinity(pid,size,mask) == -1) { - cpu_FREE(mask); + if (sched_getaffinity(pid,sizeof(*maskp),maskp) == -1) { if (errno == EINVAL && nrcpus < (1024 << 8)) { nrcpus = nrcpus << 2; goto realloc; @@ -627,19 +621,14 @@ realloc: } for (i = 0; i < nrcpus; i++) { - if (cpu_ISSET_S(i,mask)) { - if (cpu == -1) { + if (cpu_ISSET(i,maskp)) { + if (cpu == -1) cpu = i; - *maskp = mask; - *sizep = size; - } else - cpu_CLR_S(i,mask); + else + cpu_CLR(i,maskp); } } - if (cpu == -1) - cpu_FREE(mask); - return cpu; } @@ -654,8 +643,8 @@ static int test__PERF_RECORD(void) .freq = 10,.mmap_pages = 256,}; - cpu_set_t *cpu_mask = NULL; - size_t cpu_mask_size = 0; + cpu_set_t cpu_mask; + size_t cpu_mask_size = sizeof(cpu_mask); struct perf_evlist *evlist = perf_evlist__new(NULL,NULL); struct perf_evsel *evsel; struct perf_sample sample; @@ -719,8 +708,7 @@ static int test__PERF_RECORD(void) evsel->attr.sample_type |= PERF_SAMPLE_TIME; perf_evlist__config_attrs(evlist,&opts); - err = sched__get_first_possible_cpu(evlist->workload.pid,&cpu_mask,- &cpu_mask_size); + err = sched__get_first_possible_cpu(evlist->workload.pid,&cpu_mask); if (err < 0) { pr_debug("sched__get_first_possible_cpu: %s\n",strerror(errno)); goto out_delete_evlist; @@ -731,9 +719,9 @@ static int test__PERF_RECORD(void) /* * So that we can check perf_sample.cpu on all the samples. */ - if (sched_setaffinity(evlist->workload.pid,cpu_mask_size,cpu_mask) < 0) { + if (sched_setaffinity(evlist->workload.pid,&cpu_mask) < 0) { pr_debug("sched_setaffinity: %s\n",strerror(errno)); - goto out_free_cpu_mask; + goto out_delete_evlist; } /* @@ -917,8 +905,6 @@ found_exit: } out_err: perf_evlist__munmap(evlist); -out_free_cpu_mask: - cpu_FREE(cpu_mask); out_delete_evlist: perf_evlist__delete(evlist); out:
然后再安装
binutils220-2.20.51.0.2-5.29.el5.x86_64.rpm
flex-2.5.35-0.8.el5.rfb.x86_64.rpm
libgomp-4.4.7-1.el5.x86_64.rpm
这几个包。
进入tools/perf,make一下就可以了。
这几个包和patch,以及我编译出来的用于CentOS5下的32内核的perf静态文件在这可以下载:
http://download.csdn.net/detail/abigwc/9799082