I have this issorted code to sort elements of an array. It's very slow as it is right now due to a lot of memory access, number of branch instructions per iteration of the poorly written loops, and unorganized instruction/register usage causing dependencies between instructions. I'm not sure on how to go about improving my code to be faster and efficient. I need a nudge in the right direction as I'm new to assembly.

    .globl  issorted
    .align  4, 0x90
issorted:
    movl    $1, %eax
    .align  4, 0x90
LBB1_1:
    cmpq    %rsi, %rax
    jge LBB1_4
    movq    (%rdi,%rax,8), %rcx
    leaq    1(%rax), %rdx
    cmpq    %rcx, -8(%rdi,%rax,8)
    movq    %rdx, %rax
    jle LBB1_1
    xorl    %eax, %eax
    ret
LBB1_4:
    movl    $1, %eax
    ret

Show what you are doing in pseudo-code, as well as tell us what algorithm you are trying to implement (bubble-sort, qsort, etc).

I'm using qsort

long issorted(long *p, long n) {
    long i;
    for (i = 1; i < n; ++i) {
    if (p[i - 1] > p[i])
        return 0;
    }
    return 1;
}




#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>

static struct timeval start_ts, end_ts, diff_ts;

void startClock() {
    gettimeofday(&start_ts, 0);
}

void endClock() {
    gettimeofday(&end_ts, 0);
}

struct timeval timeminus(struct timeval a, struct timeval b)
{
    struct timeval res;
    res.tv_sec = a.tv_sec - b.tv_sec;
    if (a.tv_usec > b.tv_usec) {
    res.tv_usec = a.tv_usec - b.tv_usec;
    } else {
    res.tv_usec = 1000000 + a.tv_usec - b.tv_usec;
    res.tv_sec--;
    }
    return res;
}

long usecClock() {
    diff_ts = timeminus(end_ts, start_ts);
    return diff_ts.tv_usec + 1000000 * diff_ts.tv_sec;
}

int longLess(const void *a, const void *b) {
    return *(long *)a - *(long *)b;
}

extern int issorted(long *p, long n);

int main(int argc, char **argv)
{
    long l, limit = 500000, size = 10000;
    long *p = calloc(size, sizeof(long));
    long x;
    for (l = 0; l < size; ++l) {
    p[l] = random() % size;
    }
    qsort(p, size, sizeof(long), longLess);

    p[0] = size + 1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");

    qsort(p, size, sizeof(long), longLess);
    p[size - 1] = -1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");

    qsort(p, size, sizeof(long), longLess);
    p[size / 2] = size + 1;
    if (issorted(p, size))
    printf("Got the wrong answer\n");
    qsort(p, size, sizeof(long), longLess);

    printf("Timing issorted\n");
    x = 1;
    startClock();
    for (l = 0; l < limit; ++l) {
    x &= issorted(p, size);
    }
    endClock();
    if (!x)
    printf("Got the wrong answer\n");

    printf("%ld invocations of issorted(%ld) took: %ld usec\n", limit, size, usecClock());
    return 0;
}

Sorry it doesn't sort the array, the issorte asm code just checks to see if its sorted. The first place I want to start is by reducing memory access, but I also want to unroll the loop.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.