right now, I am allocating a dynamic 2D array for my rasterizer's edge list. My setup looks like this:
struct EDGE
{
float x, dx;
float uvws[4]; //u, v, 1/z, shade
float duvws[4];
float y_limit[2];
bool active;
};
struct EDGERECORD
{
EDGE *left_edge;
EDGE *right_edge;
EDGE edges[9];
UINT16 num_edges;
};
//on project initialization:
GET = (EDGERECORD**)calloc(number_of_objects, sizeof(EDGERECORD*));
//for each new object added:
GET[object_index] = new EDGERECORD[object[object_index].number_of_faces];
I need this whole thing here setup so that I can use SSE commands on these two variables:
float uvws[4]; //u, v, 1/z, shade
float duvws[4];// delta u, delta v, ect..
which will allow me to do all the increments at the same time.
I have gotten it to work with separate variables like this:
float *huvwsh;
float *hduvwsh;
float *pdx;
__m128* m128_base;
__m128* m128_delta;
__m128* m128_dx;
huvwsh = (float*)_aligned_malloc(4 * sizeof(float), 16);
hduvwsh = (float*)_aligned_malloc(4 * sizeof(float), 16);
pdx = (float*)_aligned_malloc(4 * sizeof(float), 16);
m128_base = (__m128*)huvwsh;
m128_delta = (__m128*)hduvwsh;
m128_dx = (__m128*)pdx;
*m128_base = _mm_set_ps(l_edge->uvwsh[0], l_edge->uvwsh[1], l_edge->uvwsh[2], l_edge->uvwsh[3]);
*m128_delta = _mm_set_ps(r_edge->uvwsh[0], r_edge->uvwsh[1], r_edge->uvwsh[2], r_edge->uvwsh[3]);
*m128_delta = _mm_sub_ps(*m128_delta, *m128_base);
*m128_dx = _mm_set_ps1(r_edge->x - l_edge->x);
*m128_delta = _mm_div_ps(*m128_delta, *m128_dx);
//blah blah blah
*m128_base = _mm_add_ps(*m128_base, *m128_delta);
but I need to know how to allocate everything from the first example so I can skip the step of copying everything out to a separate array before working on it.
Thanks to anyone that can help!