L
Ich habe mal damit angefangen, und drei Verfahren miteinander verglichen... Die Ausgabe ist:
./main
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32 33 33 34 34 35 35 36 36 37 37 38 38 39 39 40 40 41 41 42 42 43 43 44 44 45 45 46 47 47 48 48 49 49 50 50
a: 24.810000
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32 33 33 34 34 35 35 36 36 37 37 38 38 39 39 40 40 41 41 42 42 43 43 44 44 45 45 46 47 47 48 48 49 49 50 50
a: 24.810000
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32 33 33 34 34 35 35 36 36 37 37 38 38 39 39 40 40 41 41 42 42 43 43 44 44 45 45 46 47 47 48 48 49 49 50 50
a: 24.810000
81542
65487
78784
Die Verfahren benötigten also 0.81, 0.65 und 0.78 Sekunden... Meine Optimierung war das zweite Verfahren... Das dritte Verfahren stellt einen SelectionSort zum Vergleich dar. Hier ist die Test-"Suite":
#include <stdio.h>
#include <sys/time.h>
size_t n = 100;
size_t my_rand(size_t seed, size_t max)
{
size_t a = 16807;
return (a * seed) % max;
}
void fill(size_t a[])
{
for (size_t i = 0; i < n; i++)
{
a[i] = my_rand(i + a[0], 51);
}
}
double average(size_t a[])
{
// The average should be around 25.
double average = 0;
for (size_t i = 0; i < n; i++)
{
average += (double)a[i] / (double)n;
}
return average;
}
void print(size_t a[])
{
for (size_t i = 0; i < n; i++)
{
printf("%d ", a[i]);
}
printf("\na: %f\n\n", average(a));
}
void sort_a(size_t arr[])
{
size_t i = 0, j = 1, a, b;
do
{
a = arr[i];
b = arr[j];
if (a > b)
{
arr[i] = b;
arr[j] = a;
i = 0;
j = 1;
continue;
}
i++;
j++;
} while (j < n);
}
void sort_a_optimized(size_t arr[])
{
asm volatile(
" movq %%rdi, -40(%%rbp) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
".loop_start: \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -24(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -32(%%rbp) \n"
" movq -24(%%rbp), %%rax \n"
" cmpq %%rax, -32(%%rbp) \n"
" jnb .loop_increment \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -32(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -24(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
" jmp .loop_check \n"
".loop_increment: \n"
" addq $1, -8(%%rbp) \n"
" addq $1, -16(%%rbp) \n"
".loop_check: \n"
" movq %[n], %%rax \n"
" cmpq %%rax, -16(%%rbp) \n"
" jb .loop_start \n"
: /* No outputs. */
: [n] "m"(n)
: "memory");
}
void sort_b(size_t arr[])
{
for (size_t i = 0; i < n - 1; i++)
{
size_t a = i;
size_t b = arr[i];
for (size_t j = i + 1; j < n; j++)
{
if (arr[j] < b)
{
a = j;
b = arr[j];
}
}
arr[a] = arr[i];
arr[i] = b;
}
}
int main(int argc, char const *argv[])
{
struct timeval tv;
long long t1, t2, t3, t4, t5, t6;
size_t arr[n];
size_t itreations = 10000;
gettimeofday(&tv, 0);
t1 = tv.tv_usec;
for (size_t i = 0; i < itreations; i++)
{
fill(arr);
sort_a(arr);
}
gettimeofday(&tv, 0);
t2 = tv.tv_usec;
print(arr);
gettimeofday(&tv, 0);
t3 = tv.tv_usec;
for (size_t i = 0; i < itreations; i++)
{
fill(arr);
sort_a_optimized(arr);
}
gettimeofday(&tv, 0);
t4 = tv.tv_usec;
print(arr);
gettimeofday(&tv, 0);
t5 = tv.tv_usec;
for (size_t i = 0; i < itreations; i++)
{
fill(arr);
sort_b(arr);
}
gettimeofday(&tv, 0);
t6 = tv.tv_usec;
print(arr);
printf("%d\n%d\n%d\n", t2 - t1, t4 - t3, t6 - t5);
return 0;
}
Ich denke, für den Use-Case kann man selbst mit -O3 nicht mehr herausholen...
@Schlangenmensch sagte in Funktion optimieren:
Welcher Assambler?
Für den gcc (Linux).
@Schlangenmensch sagte in Funktion optimieren:
Warum?
Aus lernzwecken.