mirror of https://github.com/ARMmbed/mbed-os.git
				
				
				
			
		
			
				
	
	
		
			425 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
			
		
		
	
	
			425 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
/*
 | 
						|
 * Profiling framework for the events library
 | 
						|
 *
 | 
						|
 * Copyright (c) 2016 Christopher Haster
 | 
						|
 *
 | 
						|
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
 * you may not use this file except in compliance with the License.
 | 
						|
 * You may obtain a copy of the License at
 | 
						|
 *
 | 
						|
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 *
 | 
						|
 * Unless required by applicable law or agreed to in writing, software
 | 
						|
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
 * See the License for the specific language governing permissions and
 | 
						|
 * limitations under the License.
 | 
						|
 */
 | 
						|
#include "equeue.h"
 | 
						|
#include <unistd.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <setjmp.h>
 | 
						|
#include <stdint.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <inttypes.h>
 | 
						|
#include <sys/time.h>
 | 
						|
 | 
						|
 | 
						|
// Performance measurement utils
 | 
						|
#define PROF_RUNS 5
 | 
						|
#define PROF_INTERVAL 100000000
 | 
						|
 | 
						|
#define prof_volatile(t) __attribute__((unused)) volatile t
 | 
						|
 | 
						|
typedef uint64_t prof_cycle_t;
 | 
						|
 | 
						|
static volatile prof_cycle_t prof_start_cycle;
 | 
						|
static volatile prof_cycle_t prof_stop_cycle;
 | 
						|
static prof_cycle_t prof_accum_cycle;
 | 
						|
static prof_cycle_t prof_baseline_cycle;
 | 
						|
static prof_cycle_t prof_iterations;
 | 
						|
static const char *prof_units;
 | 
						|
 | 
						|
#define prof_cycle() ({                                                     \
 | 
						|
    uint32_t a, b;                                                          \
 | 
						|
    __asm__ volatile ("rdtsc" : "=a" (a), "=d" (b));                        \
 | 
						|
    ((uint64_t)b << 32) | (uint64_t)a;                                      \
 | 
						|
})
 | 
						|
 | 
						|
#define prof_loop()                                                         \
 | 
						|
    for (prof_iterations = 0;                                               \
 | 
						|
         prof_accum_cycle < PROF_INTERVAL;                                  \
 | 
						|
         prof_iterations++)
 | 
						|
 | 
						|
#define prof_start() ({                                                     \
 | 
						|
    prof_start_cycle = prof_cycle();                                        \
 | 
						|
})
 | 
						|
 | 
						|
#define prof_stop() ({                                                      \
 | 
						|
    prof_stop_cycle = prof_cycle();                                         \
 | 
						|
    prof_accum_cycle += prof_stop_cycle - prof_start_cycle;                 \
 | 
						|
})
 | 
						|
 | 
						|
#define prof_result(value, units) ({                                        \
 | 
						|
    prof_accum_cycle = value+prof_baseline_cycle;                           \
 | 
						|
    prof_iterations = 1;                                                    \
 | 
						|
    prof_units = units;                                                     \
 | 
						|
})
 | 
						|
 | 
						|
#define prof_measure(func, ...) ({                                          \
 | 
						|
    printf("%s: ...", #func);                                               \
 | 
						|
    fflush(stdout);                                                         \
 | 
						|
                                                                            \
 | 
						|
    prof_units = "cycles";                                                  \
 | 
						|
    prof_cycle_t runs[PROF_RUNS];                                           \
 | 
						|
    for (int i = 0; i < PROF_RUNS; i++) {                                   \
 | 
						|
        prof_accum_cycle = 0;                                               \
 | 
						|
        prof_iterations = 0;                                                \
 | 
						|
        func(__VA_ARGS__);                                                  \
 | 
						|
        runs[i] = prof_accum_cycle / prof_iterations;                       \
 | 
						|
    }                                                                       \
 | 
						|
                                                                            \
 | 
						|
    prof_cycle_t res = runs[0];                                             \
 | 
						|
    for (int i = 0; i < PROF_RUNS; i++) {                                   \
 | 
						|
        if (runs[i] < res) {                                                \
 | 
						|
            res = runs[i];                                                  \
 | 
						|
        }                                                                   \
 | 
						|
    }                                                                       \
 | 
						|
    res -= prof_baseline_cycle;                                             \
 | 
						|
    printf("\r%s: %"PRIu64" %s", #func, res, prof_units);                   \
 | 
						|
                                                                            \
 | 
						|
    if (!isatty(0)) {                                                       \
 | 
						|
        prof_cycle_t prev;                                                  \
 | 
						|
        while (scanf("%*[^0-9]%"PRIu64, &prev) == 0);                       \
 | 
						|
        int64_t perc = 100*((int64_t)prev - (int64_t)res) / (int64_t)prev;  \
 | 
						|
                                                                            \
 | 
						|
        if (perc > 10) {                                                    \
 | 
						|
            printf(" (\e[32m%+"PRId64"%%\e[0m)", perc);                     \
 | 
						|
        } else if (perc < -10) {                                            \
 | 
						|
            printf(" (\e[31m%+"PRId64"%%\e[0m)", perc);                     \
 | 
						|
        } else {                                                            \
 | 
						|
            printf(" (%+"PRId64"%%)", perc);                                \
 | 
						|
        }                                                                   \
 | 
						|
    }                                                                       \
 | 
						|
                                                                            \
 | 
						|
    printf("\n");                                                           \
 | 
						|
    res;                                                                    \
 | 
						|
})
 | 
						|
 | 
						|
#define prof_baseline(func, ...) ({                                         \
 | 
						|
    prof_baseline_cycle = 0;                                                \
 | 
						|
    prof_baseline_cycle = prof_measure(func, __VA_ARGS__);                  \
 | 
						|
})
 | 
						|
 | 
						|
 | 
						|
// Various test functions
 | 
						|
void no_func(void *eh)
 | 
						|
{
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// Actual performance tests
 | 
						|
void baseline_prof(void)
 | 
						|
{
 | 
						|
    prof_loop() {
 | 
						|
        prof_start();
 | 
						|
        __asm__ volatile("");
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void equeue_tick_prof(void)
 | 
						|
{
 | 
						|
    prof_volatile(unsigned) res;
 | 
						|
    prof_loop() {
 | 
						|
        prof_start();
 | 
						|
        res = equeue_tick();
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void equeue_alloc_prof(void)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, 32 * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        prof_start();
 | 
						|
        void *e = equeue_alloc(&q, 8 * sizeof(int));
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_dealloc(&q, e);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_alloc_many_prof(int count)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, count * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    void *es[count];
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
 | 
						|
    }
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        equeue_dealloc(&q, es[i]);
 | 
						|
    }
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        prof_start();
 | 
						|
        void *e = equeue_alloc(&q, 8 * sizeof(int));
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_dealloc(&q, e);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_post_prof(void)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        void *e = equeue_alloc(&q, 0);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        int id = equeue_post(&q, no_func, e);
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_post_many_prof(int count)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, count * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    for (int i = 0; i < count - 1; i++) {
 | 
						|
        equeue_call(&q, no_func, 0);
 | 
						|
    }
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        void *e = equeue_alloc(&q, 0);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        int id = equeue_post(&q, no_func, e);
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_post_future_prof(void)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        void *e = equeue_alloc(&q, 0);
 | 
						|
        equeue_event_delay(e, 1000);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        int id = equeue_post(&q, no_func, e);
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_post_future_many_prof(int count)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, count * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    for (int i = 0; i < count - 1; i++) {
 | 
						|
        equeue_call(&q, no_func, 0);
 | 
						|
    }
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        void *e = equeue_alloc(&q, 0);
 | 
						|
        equeue_event_delay(e, 1000);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        int id = equeue_post(&q, no_func, e);
 | 
						|
        prof_stop();
 | 
						|
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_dispatch_prof(void)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        equeue_call(&q, no_func, 0);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        equeue_dispatch(&q, 0);
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_dispatch_many_prof(int count)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, count * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        for (int i = 0; i < count; i++) {
 | 
						|
            equeue_call(&q, no_func, 0);
 | 
						|
        }
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        equeue_dispatch(&q, 0);
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_cancel_prof(void)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        int id = equeue_call(&q, no_func, 0);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_cancel_many_prof(int count)
 | 
						|
{
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, count * EQUEUE_EVENT_SIZE);
 | 
						|
 | 
						|
    for (int i = 0; i < count - 1; i++) {
 | 
						|
        equeue_call(&q, no_func, 0);
 | 
						|
    }
 | 
						|
 | 
						|
    prof_loop() {
 | 
						|
        int id = equeue_call(&q, no_func, 0);
 | 
						|
 | 
						|
        prof_start();
 | 
						|
        equeue_cancel(&q, id);
 | 
						|
        prof_stop();
 | 
						|
    }
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_alloc_size_prof(void)
 | 
						|
{
 | 
						|
    size_t size = 32 * EQUEUE_EVENT_SIZE;
 | 
						|
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, size);
 | 
						|
    equeue_alloc(&q, 0);
 | 
						|
 | 
						|
    prof_result(size - q.slab.size, "bytes");
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_alloc_many_size_prof(int count)
 | 
						|
{
 | 
						|
    size_t size = count * EQUEUE_EVENT_SIZE;
 | 
						|
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, size);
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        equeue_alloc(&q, (i % 4) * sizeof(int));
 | 
						|
    }
 | 
						|
 | 
						|
    prof_result(size - q.slab.size, "bytes");
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
void equeue_alloc_fragmented_size_prof(int count)
 | 
						|
{
 | 
						|
    size_t size = count * EQUEUE_EVENT_SIZE;
 | 
						|
 | 
						|
    struct equeue q;
 | 
						|
    equeue_create(&q, size);
 | 
						|
 | 
						|
    void *es[count];
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
 | 
						|
    }
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        equeue_dealloc(&q, es[i]);
 | 
						|
    }
 | 
						|
 | 
						|
    for (int i = count - 1; i >= 0; i--) {
 | 
						|
        es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
 | 
						|
    }
 | 
						|
 | 
						|
    for (int i = count - 1; i >= 0; i--) {
 | 
						|
        equeue_dealloc(&q, es[i]);
 | 
						|
    }
 | 
						|
 | 
						|
    for (int i = 0; i < count; i++) {
 | 
						|
        equeue_alloc(&q, (i % 4) * sizeof(int));
 | 
						|
    }
 | 
						|
 | 
						|
    prof_result(size - q.slab.size, "bytes");
 | 
						|
 | 
						|
    equeue_destroy(&q);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// Entry point
 | 
						|
int main()
 | 
						|
{
 | 
						|
    printf("beginning profiling...\n");
 | 
						|
 | 
						|
    prof_baseline(baseline_prof);
 | 
						|
 | 
						|
    prof_measure(equeue_tick_prof);
 | 
						|
    prof_measure(equeue_alloc_prof);
 | 
						|
    prof_measure(equeue_post_prof);
 | 
						|
    prof_measure(equeue_post_future_prof);
 | 
						|
    prof_measure(equeue_dispatch_prof);
 | 
						|
    prof_measure(equeue_cancel_prof);
 | 
						|
 | 
						|
    prof_measure(equeue_alloc_many_prof, 1000);
 | 
						|
    prof_measure(equeue_post_many_prof, 1000);
 | 
						|
    prof_measure(equeue_post_future_many_prof, 1000);
 | 
						|
    prof_measure(equeue_dispatch_many_prof, 100);
 | 
						|
    prof_measure(equeue_cancel_many_prof, 100);
 | 
						|
 | 
						|
    prof_measure(equeue_alloc_size_prof);
 | 
						|
    prof_measure(equeue_alloc_many_size_prof, 1000);
 | 
						|
    prof_measure(equeue_alloc_fragmented_size_prof, 1000);
 | 
						|
 | 
						|
    printf("done!\n");
 | 
						|
}
 |