Provided below is a bare bones dynamic array library. The game is to extend this library with new primitives and features that interest you, that you've needed in the past, or that you feel may be useful. The intention of this game is to give some of the newer folks a chance to get some design experience, but it's by no means limited to beginners; anyone can join in. And if we end up with a handy library as a result, all the better. :)

The starting library is very basic. It offers only construction/destruction, resize, and get/set. Suggested improvements might be insertion, removal, sorting, and searching. The sky is the limit! See what you can come up with, and post it for everyone to see (though sharing what you've done isn't necessary since this is for your own personal growth).

But first, an example using the base library:

#include <stdio.h>
#include "darray.h"

int main(void)
{
    darray a = {sizeof(int)};
    size_t i;
    int x;

    /* Populate the array from user input */
    printf("Enter a list of integers: ");
    fflush(stdout);

    while (scanf("%d", &x) == 1) {
        darray_resize(&a, a.capacity + 1);
        darray_set(&a, a.capacity - 1, &x);
    }

    /* Use the array */
    for (i = 0; i < a.capacity; ++i)
        printf("%4d", darray_getval(&a, i, int));

    putchar('\n');

    /* Clean up the array */
    darray_reset(&a);

    return 0;
}

It can define a multidimensional array too, but it's somewhat awkward (hint for extending the library):

#include <stdio.h>
#include "darray.h"

#define N 3

int main(void)
{
    darray a, temp;
    size_t i, j;
    int k = 1;

    /* Create an NxN square matrix using darray */
    darray_init(&a, sizeof(darray), N);

    for (i = 0; i < N; ++i) {
        darray_init(&temp, sizeof(darray), N);

        for (j = 0; j < N; ++j, ++k)
            darray_set(&temp, j, &k);

        darray_set(&a, i, &temp);
    }

    /* Use the matrix */
    for (i = 0; i < a.capacity; ++i) {
        darray *p = darray_get(&a, i);

        for (j = 0; j < p->capacity; ++j)
            printf("%4d", darray_getval(p, j, int));

        putchar('\n');
    }

    /* Clean up the matrix */
    for (size_t i = 0; i < a.capacity; ++i)
        darray_reset(darray_get(&a, i));

    darray_reset(&a);

    return 0;
}

Now the library itself. Here is the header:

#ifndef DARRAY_H
#define DARRAY_H
#include <stddef.h>

#if __STDC_VERSION__ >= 199901L
    #include <stdbool.h>
#elif !defined(DARRAY_DISABLE_BOOL)
    /*
        If another library defines a boolean type then DARRAY_DISABLE_BOOL
        can be defined to disable this library's definition.
    */
    #define true  1
    #define false 0

    typedef int bool;
#endif

/*
    @description:
        Defines a type-dependent wrapper around darray_get 
        to avoid ugly casting and dereferencing when a
        stored value is needed.
*/
#define darray_getval(p,pos,type) (*(type*)darray_get(p, pos))

/*
    @description:
        Defines a type-independent wrapper around darray_set
        to correspond with darray_getval.

        Note: This macro isn't 100% comparable to darray_set.
            1) darray_setval cannot be used in an expression.
            2) darray_setval will not return a value like darray_set.
*/
#define darray_setval(p,pos,item,type) \
    do {                               \
        type dummy = item;             \
        darray_set(p, pos, &dummy);    \
    } while(false)

typedef struct darray darray;

struct darray {
    size_t item_size;    /* Size in bytes of each item (homogeneous) */
    size_t capacity;     /* Number of valid items possible without a resize */
    unsigned char *data; /* Storage for array items */
};

extern void  darray_init(darray *p, size_t item_size, size_t capacity);
extern void  darray_free(darray *p);
extern bool  darray_resize(darray *p, size_t new_size);
extern void *darray_get(const darray *p, size_t pos);
extern bool  darray_set(darray *p, size_t pos, void *item);

#endif /* DARRAY_H */

And here is the implementation:

#include "darray.h"

/*
    @description:
        Initializes the darray pointed to by p to the requested capacity.
*/
extern void darray_init(darray *p, size_t item_size, size_t capacity)
{
    p->item_size = item_size;
    p->capacity = 0;
    p->data = NULL;

    if (capacity > 0)
        darray_resize(p, capacity);
}

/*
    @description:
        Releases memory from a previously initialized darray
        pointed to by p, and resets to empty status. The item_size 
        value of the darray is preserved.
*/
extern void darray_reset(darray *p)
{
    free(p->data);
    darray_init(p, p->item_size, 0);
}

/*
    @description:
        Resizes the darray pointed to by p to a new capacity.
        If the new capacity and current capacity are equal, nothing
        is done. If the new capacity is smaller than the current 
        size, the array is truncated to fit the new capacity.
*/
extern bool darray_resize(darray *p, size_t new_capacity)
{
    unsigned char *new_data;
    size_t nitems;

    if (new_capacity == p->capacity)
        return true; /* Don't resize to the same capacity (wasted effort) */
    else {
        new_data = calloc(new_capacity, p->item_size);

        if (!new_data)
            return false; /* Memory allocation failed */

        nitems = p->capacity;

        if (nitems > new_capacity)
            nitems = new_capacity; /* Truncate to a smaller capacity */

        /* Populate the new data array and release the old one */
        memcpy(new_data, p->data, nitems * p->item_size);
        free(p->data);

        /* Apply the changes to the darray */
        p->capacity = new_capacity;
        p->data = new_data;

        return true;
    }
}

/*
    @description:
        Retrieves the item stored at position pos within the 
        darray and returns a pointer to it.
*/
extern void *darray_get(const darray *p, size_t pos)
{
    return &p->data[pos * p->item_size];
}

/*
    @description:
        Sets the item stored at position pos within the darray
        and returns true, or returns false if the position
        is out of range.
*/
extern bool darray_set(darray *p, size_t pos, void *item)
{
    if (pos >= p->capacity)
        return false;

    memcpy(&p->data[pos * p->item_size], item, p->item_size);

    return true;
}

Have fun! :)

myk45 commented: Excellent idea! +5

Hi deceptikon,

Awesome idea! I actually created a new Github account just for this(and maybe more in future). I hope this will be more fun for everyone :)

I have checked in a VS2010 project into it.

1) For those who haven't used Git before, i plan on writing a small "how-to" soon. It shouldn't take more than 0.5-1 day to get a hang of it even if you are new to Software versioning :)

2) Here is the link:github link

I have made two small additions to code and commited them.
@deceptikon: Kindly review the same.

Also, @all, please do not merge into the main branch(master) till someone reviews it. Just so that we have some formal way of doing things.

PS: To push files,
Username: Daniweb
Password: github1234

Nice. Good idea to put it in public version control. :)

Two suggestions though:

  1. Since darray_compare_bytes() is basically reinventing memcmp(), why not just use memcmp() to begin with?
  2. The function comment for darray_compare_bytes() is a copy paste of darray_search(), you forgot to change it. ;)

It's fun seeing how different people write code that's functionally the same. For example, I'd probably write darray_search() like this:

/*
    @description:
        Searches for the first occurrence of an element matching
        item in the darray pointed to by p. If found, the index
        of the element is retured. If not found, (size_t)-1 is
        returned.
*/
extern size_t darray_find(darray *p, void *item)
{
    size_t i;

    /* Basic linear search */
    for (i = 0; i < p->capacity; ++i) {
        if (memcmp(darray_get(p, i), item, p->item_size) == 0)
            return i;
    }

    return (size_t)-1;
}

Basically the same thing, but differences in style and preference show through even for such a short function. :)

Since darray_compare_bytes() is basically reinventing memcmp(), why not just use memcmp() to begin with?

haha! yeah i forgot about memcmp(). changed code to include memcmp().

The function comment for darray_compare_bytes() is a copy paste of darray_search(), you forgot to change it. ;)

yeah, i have removed this anyway now. :)

It's fun seeing how different people write code that's functionally the same

True. I especially always observe even the indenting style. K&R or Allman. Sometimes i see people mix the two as well!

Also, i wrote a small "howto on git" in the tutorial section. But somehow it doesn't appear in the "tutorials" section. Could you please move it accordingly? Thanks!

So that the additions and suggestions aren't lost to github, here are two more public functions and one private helper. The first is a generic swap, since the storage for the darray may be confusing and awkward to work with directly. The second is a heapsort implementation:

static bool do_heap(darray *p, size_t i, size_t n, int (*cmp)(const void*, const void*));

/*
    @description:
        Swaps two elements in the darray pointed to by p. If the
        swap could not be completed, false is returned. Otherwise
        true is returned.
*/
extern bool darray_swap(darray *p, size_t pos1, size_t pos2)
{
    if (pos1 >= p->capacity || pos2 >= p->capacity)
        return false; /* Out of range index */
    else {
#if __STDC_VERSION__ >= 199901L

        /*
            VLAs can be risky due to the runtime size not under programmer
            control. So if the size is greater than a threshold, use
            dynamic allocation. That should rarely happen here, if ever, 
            because the item_size is assumed to be the number of bytes 
            in an object.
        */
#define VLA_LIMIT 1024
        unsigned char temp_base[p->item_size < VLA_LIMIT ? p->item_size : 1];
        bool dynamic_temp = false;
        void *temp = temp_base;

        if (p->item_size >= VLA_LIMIT) {
            temp = malloc(p->item_size);
            dynamic_temp = true;
        }
#undef VLA_LIMIT

#else /* __STDC_VERSION__ */

        /*
            Prior to C99 we can't take advantage of a stack-based
            array with a runtime size. At least we can't without using
            something nonportable like alloca(). To keep the library
            portable, malloc() is used, despite slowness.
        */
        void *temp = malloc(p->item_size);
        bool dynamic_temp = false;

#endif /* __STDC_VERSION__ */

        void *q = &p->data[pos1 * p->item_size];
        void *r = &p->data[pos2 * p->item_size];

        if (!temp)
            return false; /* Failed dynamic allocation */

        memcpy(temp, q, p->item_size);
        memcpy(q, r, p->item_size);
        memcpy(r, temp, p->item_size);

        if (dynamic_temp)
            free(temp);

        return true;
    }
}

/*
    @description:
        Sorts the darray pointed to by p according to the comparison
        rules specified within the function pointed to by cmp. If
        the sort could not be completed, false is returned. Otherwise,
        true is returned.
*/
extern bool darray_sort(darray *p, int (*cmp)(const void*, const void*))
{
    size_t n = p->capacity;
    size_t i = n / 2;

    /* Make the entire array a valid heap */
    while (i-- > 0) {
        if (!do_heap(p, i, n, cmp))
            return false;
    }

    while (--n < (size_t)-1) {
        /* Place the maximum value and fix the heap for remaining elements */
        if (!darray_swap(p, 0, n) || !do_heap(p, 0, n, cmp))
            return false;
    }

    return true;
}

/*
    @description:
        Heapify helper for darray_sort.
*/
static bool do_heap(darray *p, size_t i, size_t n, int (*cmp)(const void*, const void*))
{
    void *temp = malloc(p->item_size);
    unsigned char *base = p->data;
    size_t size = p->item_size;
    size_t k;

    if (!temp)
        return false; /* Failed dynamic allocation */

    memcpy(temp, &base[i * size], size);

    for (k = i * 2 + 1; k < n; k = i * 2 + 1) {
        if (k + 1 < n && cmp(&base[k * size], &base[(k + 1) * size]) < 0)
            ++k;

        if (cmp(temp, &base[k * size]) >= 0)
            break;

        memcpy(&base[i * size], &base[k * size], size);
        i = k;
    }

    memcpy(&base[i * size], temp, size);
    free(temp);

    return true;
}

I chose heapsort over quicksort because it's relatively simple and doesn't have nasty degenerate cases. Fixing all of the degenerate cases with something like introsort would drive up the complexity drastically.

I'll let someone else add it to the project since I don't have any Git tools installed presently.

I'll let someone else add it to the project since I don't have any Git tools installed presently.

Yeah, let's wait and see if someone volunteers. Or else, i will push them in. I actually had planned on writing a sort tonight. Well, i'll think of something else to add.

I actually had planned on writing a sort tonight. Well, i'll think of something else to add.

How about a stable sort? :)

awesome thinking! hats off man! i am thinking on this now! ;) that's a cool idea where i can learn alot and alot and without any limit! as james sir has said "sky is the limit!" ;)

Hi,
I am planning write a push_back interface for the Dynamic Array(as in the std::vector in C++). But somehow, i am not finding an elegant solution. All i can think of is some kind of a linked-list approach.

struct darray {
    size_t item_size;    /* Size in bytes of each item (homogeneous) */
    size_t capacity;     /* Number of valid items possible without a resize */
    unsigned char *data; /* Storage for array items */
};

So, i am planning to use some bytes in "data" for storing our next pointer, so making a push_back() interface possible. Thus not limiting our approach to have a initial size and then increase the size only on a "resize"

Could this be done in a more elegant way?

@deceptikon

How about a stable sort? :)

Ah, yes. i will try that after this above idea can be tried in some way.(i actually have never implemented a stable sort earlier. so, should be interesting) :)

I am planning write a push_back interface for the Dynamic Array(as in the std::vector in C++).

I actually included that functionality at first, where there was both a size and capacity member in the darray. But there was an intended usage problem. With an automatically growing dynamic array you basically have to take full control over the size and disallow direct element access. Everything has to go through the add/remove interface, otherwise the actual size and reported size may not match.

The intention of the darray is to simplify resizing rather than give up full control over the array, so I cut out the size and left the capacity. Now the library is basically little more than a C array where the size can change at runtime. So rather than a true vector (a la C++), it's a slightly more flexible array.

I think adding vector-like behavior to darray would be best implemented as a wrapper around the library, and possibly even as an opaque type so that direct access to the darray isn't allowed:

struct vector {
    darray base;
    size_t size;
};

extern void push_back(vector *p, void *item);
extern void insert(vector *p, size_t pos, void *item);
extern void erase(vector *p, size_t pos);

Hi,

@deceptikon:
Added your changes + insertion sort
Also, another small function to return all occurances of some data into a bytestream

/*
    @description:
        This function copies all occurances of some data into a separate byte stream.
*/
extern void darray_find_all_occurances(darray *d_array, void* data, unsigned char** all_occurances)
{    
    size_t i, count = 0;    

    if (d_array) {
        for (i = 0; i < d_array->capacity; i++) {
            if (memcmp(&d_array->data[i * d_array->item_size], data, d_array->item_size) == 0) {
                count++;
            }
        }    

        if (count == 0) {
            *all_occurances = NULL;
            return;
        }

        *all_occurances = (unsigned char*)calloc(count, d_array->item_size);

        for (i = 0; i < d_array->capacity; i++) {
            if (memcmp(&d_array->data[i * d_array->item_size], data, d_array->item_size) == 0) {
                memcpy(*all_occurances, &d_array->data[i * d_array->item_size], d_array->item_size);                
                *all_occurances += d_array->item_size;              
            }
        }   

        *all_occurances -= count * d_array->item_size;
    } else {
        assert(0);
    }
}

@All:

I was recently in need of hashing(i was using Qt) and i found it very useful using the standard implementations
of the hashing algorithms provided in Qt. So, i thought this could be a useful function in our library.

extern void darray_get_hash(darray *d_array, unsigned char** hash)
{    
    size_t i;   

    *hash = d_array->data;

    if (d_array) {
        for (i = 1; i < d_array->capacity; i++) {
            // TODO: Use a hashing algo to get hash.
        }    
    } else {
        assert(0);
    }
}

I had seen some questions related to Cryptography and security on this forum. So, i guess you guys could give a shot? ;)
For starters, maybe we could go with md5?

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.