EDIT for moderators:
Yes that "one might think that swapbuffer is slow" was me. Heh. :D Could someone change the topicname to "Optimizing OpenGL"? Thanks.
Hey guys,
I'm trying to optimize this OpenGL program, so the problem isn't C, but the program is.
The program loads a vanilla WaveFront .obj file, stores all it's data in an array, creates a display list, uses glDrawElements to put it in the display list and then calls the display list as often as it can. For my test file, this "how often as it can" gives an FPS of about 37. While this is acceptable, another program that does the same manages to do it at 60 FPS.
How do I achieve that performance in this program? The bottleneck appears to be the RenderScene function. It only shows when using glFinish(), else one might think SwapBuffer() takes that amount of time (SwapBuffer waits, of course, for the videocard to finish its rendering). Anyway, from beginning RenderScene to the return of SwapBuffer takes too much time. Too much CPU time that is! My Intel blah blah dual core has once core fully eaten up by this code, and I have no clue why.
Here is the output from the sourcecode below:
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 32
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 32
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 15
WM_TIMER took: Time elapsed: 32
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 0
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 16
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 31
WM_TIMER took: Time elapsed: 32
WM_TIMER took: Time elapsed: 15
WM_TIMER took: Time elapsed: 32
WM_TIMER took: Time elapsed: 31
FPS: 36
How can I speed this up?
Thanks in Advance,
Nick
Code:
BowViceJet.c
#include <stdio.h>
#include <windows.h>
#include <gl/gl.h>
#include <gl/glu.h>
#include <gl/glext.h>
#include <time.h>
#include "loadObj.h"
#define TIMEELAPSED {printf("\tTime elapsed: %lu\n", clock() - startTime);}
int APIENTRY WinMain(HINSTANCE instance, HINSTANCE prevInstance, LPSTR cmdLine, int cmdShow);
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
void dbgError(char *msg);
void MySetPixelFormat(HDC hDC);
void RenderScene();
void ChangeSize(int width, int height);
int initGL();
void pressKey(WPARAM key);
const static char className[] = "OpenGLClass";
const static char appName[] = "OpenGL Window";
GLfloat xRot, yRot, zRot;
GLfloat xDis, yDis, zDis;
float *vertices, *normals;
int *triangleindex, *quadindex;
unsigned int quadcount, trianglecount;
int err;
char *globcmdLine;
unsigned int globDisplayLists = 0;
char optFrontFace = 0; //CCW default
void dbgError(char *msg){
fprintf(stderr, "Error @ %s. GetLastError(): %lu", msg, GetLastError());
return;
}
int APIENTRY WinMain(HINSTANCE instance, HINSTANCE prevInstance, LPSTR cmdLine, int cmdShow){
globcmdLine = cmdLine;
MSG msg;
WNDCLASS wc;
HWND hWnd;
wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
wc.lpfnWndProc = (WNDPROC) WndProc;
wc.cbClsExtra = 0;
wc.cbWndExtra = 0;
wc.hInstance = instance;
wc.hIcon = NULL;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.hbrBackground = NULL;
wc.lpszMenuName = NULL;
wc.lpszClassName = className;
if(RegisterClass(&wc) == 0){
dbgError("RegisterClass()");
return -1;
}
const DWORD style = WS_OVERLAPPEDWINDOW | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
const int xPos = 0;
const int yPos = 0;
const int width = 640;
const int height = 480;
hWnd = CreateWindow(className, appName, style, xPos, yPos, width, height, NULL, NULL, instance, NULL);
if(hWnd == NULL){
dbgError("CreateWindow()");
return -1;
}
ShowWindow(hWnd, SW_SHOW);
UpdateWindow(hWnd);
int ret;
while((ret = GetMessage(&msg, NULL, 0, 0)) > 0){
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if(ret == -1){
dbgError("GetMessage()");
return -1;
}
printf("Exiting from WinMain! G'bye.\n");
return msg.wParam;
}
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam){
static HGLRC hRC = NULL;
static HDC hDC = NULL;
clock_t startTime;
switch(msg){
case WM_TIMER:
startTime = clock();
RenderScene();
xRot += 0.5;
yRot += 0.5;
zRot += 0.5;
SwapBuffers(hDC);
printf("WM_TIMER took: "); TIMEELAPSED
break;
case WM_CREATE:
hDC = GetDC(hWnd);
MySetPixelFormat(hDC);
hRC = wglCreateContext(hDC);
if (hRC == NULL){
unsigned long err = GetLastError();
char *msg = calloc(sizeof(char), 512);
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, err, 0, msg, 512, 0);
printf("wglCreateContext() failed: error nr. %lu:\n\t%s\n", err, msg);
}
if (wglMakeCurrent(hDC, hRC) == FALSE){
unsigned long err = GetLastError();
char *msg = calloc(sizeof(char), 512);
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, err, 0, msg, 512, 0);
printf("wglMakeCurrent() failed: error nr. %lu:\n\t%s\n", err, msg);
}
if(initGL() < 0){
printf("Error loading in model. Exiting");
exit(1);
}
else{
printf("Model loaded and ready to go!\n");
}
//fire as often as it can
SetTimer(hWnd, 0, 1, NULL);
printf("Running with GL_VENDOR: %s\n", glGetString(GL_VENDOR));
break;
case WM_DESTROY:
wglMakeCurrent(hDC, NULL);
wglDeleteContext(hRC);
PostQuitMessage(0);
break;
case WM_PAINT:
//printf("WM_PAINTING. ;-)\n");
RenderScene();
SwapBuffers(hDC);
ValidateRect(hWnd, NULL);
break;
case WM_SIZE:
ChangeSize(LOWORD(lParam), HIWORD(lParam));
break;
case WM_KEYDOWN:
pressKey(wParam);
break;
default:
return (DefWindowProc(hWnd, msg, wParam, lParam));
break;
}
return 0;
}
void MySetPixelFormat(HDC hDC){
int pixelFormat;
static PIXELFORMATDESCRIPTOR pfd = {
sizeof(PIXELFORMATDESCRIPTOR),
1,
PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER | PFD_TYPE_RGBA,
32, //depth of color
0,0,0,0,0,0,0,0,0,0,0,0,0,
16 , //depth of depthbuffer
0,0,0,0,0,0,0};
pixelFormat = ChoosePixelFormat(hDC, &pfd);
if (SetPixelFormat(hDC, pixelFormat, &pfd) == FALSE){
unsigned long err = GetLastError();
char *msg = calloc(sizeof(char), 512);
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, err, 0, msg, 512, 0);
printf("SetPixelFormat() failed: error nr. %lu:\n\t%s\n", err, msg);
}
return;
}
void RenderScene(){
//printf("Rendering scene...\n");
static clock_t nextSecond = 0;
static unsigned int framesPerSecond = 0;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
clock_t startTime = clock();
//glMatrixMode(GL_MODELVIEW);
if(optFrontFace){
glFrontFace(GL_CW);
}
else{
glFrontFace(GL_CCW);
}
glPushMatrix();
glTranslatef(xDis, yDis, zDis-20);
glRotatef(xRot, 1, 0, 0);
glRotatef(yRot, 0, 1, 0);
glRotatef(zRot, 0, 0, 1);
glColor3f(.5, .6, .6);
//printf("Quadindex: %p\tTriangleindex: %p\n", quadindex, triangleindex);
//printf("Quadcount: %u\tTrianglecount: %u\n", quadcount, trianglecount);
glCallList(globDisplayLists);
glPopMatrix();
//glDisableClientState(GL_VERTEX_ARRAY);
//glDisableClientState(GL_NORMAL_ARRAY);
framesPerSecond++;
if(clock() >= nextSecond){
nextSecond = clock() + CLOCKS_PER_SEC;
printf("FPS: %u\n", framesPerSecond);
framesPerSecond = 0;
}
return;
}
void ChangeSize(int width, int height){
if (height == 0) height = 1;
GLfloat aspect = (GLfloat)width/(GLfloat)height;
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluPerspective(60, aspect, 0.1, 1000);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
return;
}
int initGL(){
xRot = yRot = zRot = 0;
xDis = yDis = zDis = 0;
vertices = normals = NULL;
triangleindex = quadindex = NULL;
quadcount = trianglecount = 0;
glFrontFace(GL_CCW);
glEnable(GL_DEPTH_TEST);
glEnable(GL_CULL_FACE);
glEnable(GL_LIGHTING);
//lighting colors
GLfloat ambient[] = {0.35, 0.35, 0.35, 1.0};
GLfloat diffuse[] = {0.5, 0.5, 0.5, 1.0};
//GLfloat specular[] = {1, 1, 1, 1};
GLfloat light0Pos[] = {-60, 10, 10, 1};
GLfloat light1Pos[] = {60, 10, 10, 1};
glLightModelfv(GL_LIGHT_MODEL_AMBIENT, ambient);
glLightfv(GL_LIGHT0, GL_AMBIENT, ambient);
glLightfv(GL_LIGHT0, GL_DIFFUSE, diffuse);
//glLightfv(GL_LIGHT0, GL_SPECULAR, specular);
glLightfv(GL_LIGHT0, GL_POSITION, light0Pos);
glEnable(GL_LIGHT0);
glLightModelfv(GL_LIGHT_MODEL_AMBIENT, ambient);
glLightfv(GL_LIGHT1, GL_AMBIENT, ambient);
glLightfv(GL_LIGHT1, GL_DIFFUSE, diffuse);
//glLightfv(GL_LIGHT1, GL_SPECULAR, specular);
glLightfv(GL_LIGHT1, GL_POSITION, light1Pos);
glEnable(GL_LIGHT1);
glEnable(GL_COLOR_MATERIAL);
glColorMaterial(GL_FRONT, GL_AMBIENT_AND_DIFFUSE);
//GLfloat specref[] = {1, 1, 1, 1};
//glMaterialfv(GL_FRONT, GL_SPECULAR, specref);
//glMateriali(GL_FRONT, GL_SHININESS, 120);
glClearColor(0.0, 0.0, 0.0, 1.0);
err = loadObj(globcmdLine, &vertices, &normals, &triangleindex, &quadindex, &quadcount, &trianglecount);
if(err < 0){
printf("loadObj failed to load object!");
return -1;
}
if(vertices == NULL || normals == NULL || (triangleindex == NULL && quadindex == NULL)){
printf("Error! One of the essential pointers == NULL!");
return -1;
}
//printf("Enabling vertex and normal arrays...");
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
//printf("Done\n");
//printf("Vertices: %p\tNormals: %p\n", vertices, normals);
glVertexPointer(3, GL_FLOAT, 0, vertices);
glNormalPointer(GL_FLOAT, 0, normals);
globDisplayLists = glGenLists(1);
glNewList(globDisplayLists, GL_COMPILE);
glDrawElements(GL_QUADS, quadcount*4, GL_UNSIGNED_INT, quadindex);
glDrawElements(GL_TRIANGLES, trianglecount*3, GL_UNSIGNED_INT, triangleindex);
glEndList();
return 0;
}
void pressKey(WPARAM key){
#define KEYPRESS(x, a, b, c){\
case x:\
a b c;\
break;\
}
#define KEYPAIR(keyone, keytwo, var, val){\
KEYPRESS(keyone, var, +=, val);\
KEYPRESS(keytwo, var, -=, val);\
}
#define OPTION(key, optname){\
KEYPRESS(key, optname, ^=, 1);\
}
static float STRENGTH = 1;
switch (key){
//rotation
KEYPAIR('Q', 'E', yRot, (STRENGTH*2));
KEYPAIR('A', 'D', xRot, (STRENGTH*2));
KEYPAIR('Z', 'C', zRot, (STRENGTH*2));
//displacement
KEYPAIR(VK_RIGHT, VK_LEFT, xDis, STRENGTH);
KEYPAIR(VK_UP, VK_DOWN, yDis, STRENGTH);
KEYPAIR(VK_CONTROL, VK_NUMPAD0, zDis, STRENGTH);
//options
OPTION('F', optFrontFace);
//STRENGTH adjustment :D
KEYPAIR('O', 'P', STRENGTH, 0.01);
}
printf("Strength: %f\n", STRENGTH);
#define ROUNDTHREESIXTY(x) { \
if (x <= -360.0 || x >= 360.0){\
x = 0;\
}\
}
ROUNDTHREESIXTY(yRot);
ROUNDTHREESIXTY(xRot);
ROUNDTHREESIXTY(zRot);
#undef ROUNDTHREESIXTY
#undef OPTION
#undef KEYPAIR
#undef KEYPRESS
return;
}
loadObj.h isn't included for it doesn't matter for the rendering code and is rather long.