[4] | 1 | /* |
---|
| 2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. |
---|
| 3 | * |
---|
| 4 | * NVIDIA Corporation and its licensors retain all intellectual property and |
---|
| 5 | * proprietary rights in and to this software and related documentation and |
---|
| 6 | * any modifications thereto. Any use, reproduction, disclosure, or distribution |
---|
| 7 | * of this software and related documentation without an express license |
---|
| 8 | * agreement from NVIDIA Corporation is strictly prohibited. |
---|
| 9 | * |
---|
| 10 | */ |
---|
| 11 | #include <stdlib.h> |
---|
| 12 | #include <stdio.h> |
---|
| 13 | #include <string.h> |
---|
| 14 | #include <GL/glew.h> |
---|
| 15 | #include <cufft.h> |
---|
| 16 | #include <cutil_inline.h> |
---|
| 17 | #include <cutil_gl_inline.h> |
---|
| 18 | #include <cuda_gl_interop.h> |
---|
| 19 | //#include <rendercheck_gl.h> |
---|
| 20 | |
---|
| 21 | #if defined(SAGE_APP) |
---|
| 22 | #include "sail.h" |
---|
| 23 | GLubyte *rgbBuffer = 0; |
---|
| 24 | sail sageInf; // sail object |
---|
| 25 | #endif |
---|
| 26 | |
---|
| 27 | #if defined(__APPLE__) || defined(MACOSX) |
---|
| 28 | #include <GLUT/glut.h> |
---|
| 29 | #else |
---|
| 30 | #include <GL/glut.h> |
---|
| 31 | #endif |
---|
| 32 | |
---|
| 33 | #include "fluidsGL_kernels.cu" |
---|
| 34 | |
---|
| 35 | #define MAX_EPSILON_ERROR 1.0f |
---|
| 36 | |
---|
| 37 | // Define the files that are to be save and the reference images for validation |
---|
| 38 | const char *sOriginal[] = |
---|
| 39 | { |
---|
| 40 | "fluidsGL.ppm", |
---|
| 41 | NULL |
---|
| 42 | }; |
---|
| 43 | |
---|
| 44 | const char *sReference[] = |
---|
| 45 | { |
---|
| 46 | "ref_fluidsGL.ppm", |
---|
| 47 | NULL |
---|
| 48 | }; |
---|
| 49 | |
---|
| 50 | // CUDA example code that implements the frequency space version of |
---|
| 51 | // Jos Stam's paper 'Stable Fluids' in 2D. This application uses the |
---|
| 52 | // CUDA FFT library (CUFFT) to perform velocity diffusion and to |
---|
| 53 | // force non-divergence in the velocity field at each time step. It uses |
---|
| 54 | // CUDA-OpenGL interoperability to update the particle field directly |
---|
| 55 | // instead of doing a copy to system memory before drawing. Texture is |
---|
| 56 | // used for automatic bilinear interpolation at the velocity advection step. |
---|
| 57 | |
---|
| 58 | #ifdef __DEVICE_EMULATION__ |
---|
| 59 | #define DIM 64 // Square size of solver domain |
---|
| 60 | #else |
---|
| 61 | #define DIM 768 // Square size of solver domani |
---|
| 62 | #endif |
---|
| 63 | #define DS (DIM*DIM) // Total domain size |
---|
| 64 | #define CPADW (DIM/2+1) // Padded width for real->complex in-place FFT |
---|
| 65 | #define RPADW (2*(DIM/2+1)) // Padded width for real->complex in-place FFT |
---|
| 66 | #define PDS (DIM*CPADW) // Padded total domain size |
---|
| 67 | |
---|
| 68 | #define DT 0.09f // Delta T for interative solver |
---|
| 69 | #define VIS 0.0025f // Viscosity constant |
---|
| 70 | #define FORCE (5.8f*DIM) // Force scale factor |
---|
| 71 | #define FR 4 // Force update radius |
---|
| 72 | |
---|
| 73 | #define TILEX 64 // Tile width |
---|
| 74 | #define TILEY 64 // Tile height |
---|
| 75 | #define TIDSX 64 // Tids in X |
---|
| 76 | #define TIDSY 4 // Tids in Y |
---|
| 77 | |
---|
| 78 | void cleanup(void); |
---|
| 79 | |
---|
| 80 | // CUFFT plan handle |
---|
| 81 | static cufftHandle planr2c; |
---|
| 82 | static cufftHandle planc2r; |
---|
| 83 | static cData *vxfield = NULL; |
---|
| 84 | static cData *vyfield = NULL; |
---|
| 85 | |
---|
| 86 | cData *hvfield = NULL; |
---|
| 87 | cData *dvfield = NULL; |
---|
| 88 | static int wWidth = max(512,DIM); |
---|
| 89 | static int wHeight = max(512,DIM); |
---|
| 90 | |
---|
| 91 | static int clicked = 0; |
---|
| 92 | static int fpsCount = 0; |
---|
| 93 | static int fpsLimit = 1; |
---|
| 94 | unsigned int timer; |
---|
| 95 | |
---|
| 96 | // Particle data |
---|
| 97 | GLuint vbo = 0; // OpenGL vertex buffer object |
---|
| 98 | static cData *particles = NULL; // particle positions in host memory |
---|
| 99 | static int lastx = 0, lasty = 0; |
---|
| 100 | |
---|
| 101 | // Texture pitch |
---|
| 102 | size_t tPitch = 0; // Now this is compatible with gcc in 64-bit |
---|
| 103 | |
---|
| 104 | bool g_bQAReadback = false; |
---|
| 105 | bool g_bQAAddTestForce = true; |
---|
| 106 | int g_iFrameToCompare = 100; |
---|
| 107 | int g_TotalErrors = 0; |
---|
| 108 | |
---|
| 109 | // CheckFBO/BackBuffer class objects |
---|
| 110 | //CheckRender *g_CheckRender = NULL; |
---|
| 111 | |
---|
| 112 | void autoTest(); |
---|
| 113 | |
---|
| 114 | |
---|
| 115 | void addForces(cData *v, int dx, int dy, int spx, int spy, float fx, float fy, int r) { |
---|
| 116 | |
---|
| 117 | dim3 tids(2*r+1, 2*r+1); |
---|
| 118 | |
---|
| 119 | addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch); |
---|
| 120 | cutilCheckMsg("addForces_k failed."); |
---|
| 121 | } |
---|
| 122 | |
---|
| 123 | void advectVelocity(cData *v, float *vx, float *vy, |
---|
| 124 | int dx, int pdx, int dy, float dt) { |
---|
| 125 | |
---|
| 126 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); |
---|
| 127 | |
---|
| 128 | dim3 tids(TIDSX, TIDSY); |
---|
| 129 | |
---|
| 130 | updateTexture(v, DIM*sizeof(cData), DIM, tPitch); |
---|
| 131 | advectVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, dt, TILEY/TIDSY); |
---|
| 132 | |
---|
| 133 | cutilCheckMsg("advectVelocity_k failed."); |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt, |
---|
| 137 | float visc) { |
---|
| 138 | // Forward FFT |
---|
| 139 | cufftExecR2C(planr2c, (cufftReal*)vx, (cufftComplex*)vx); |
---|
| 140 | cufftExecR2C(planr2c, (cufftReal*)vy, (cufftComplex*)vy); |
---|
| 141 | |
---|
| 142 | uint3 grid = make_uint3((dx/TILEX)+(!(dx%TILEX)?0:1), |
---|
| 143 | (dy/TILEY)+(!(dy%TILEY)?0:1), 1); |
---|
| 144 | |
---|
| 145 | uint3 tids = make_uint3(TIDSX, TIDSY, 1); |
---|
| 146 | |
---|
| 147 | diffuseProject_k<<<grid, tids>>>(vx, vy, dx, dy, dt, visc, TILEY/TIDSY); |
---|
| 148 | cutilCheckMsg("diffuseProject_k failed."); |
---|
| 149 | |
---|
| 150 | // Inverse FFT |
---|
| 151 | cufftExecC2R(planc2r, (cufftComplex*)vx, (cufftReal*)vx); |
---|
| 152 | cufftExecC2R(planc2r, (cufftComplex*)vy, (cufftReal*)vy); |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | void updateVelocity(cData *v, float *vx, float *vy, |
---|
| 156 | int dx, int pdx, int dy) { |
---|
| 157 | |
---|
| 158 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); |
---|
| 159 | |
---|
| 160 | dim3 tids(TIDSX, TIDSY); |
---|
| 161 | |
---|
| 162 | updateVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, TILEY/TIDSY, tPitch); |
---|
| 163 | cutilCheckMsg("updateVelocity_k failed."); |
---|
| 164 | } |
---|
| 165 | |
---|
| 166 | void advectParticles(GLuint buffer, cData *v, int dx, int dy, float dt) { |
---|
| 167 | |
---|
| 168 | dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1)); |
---|
| 169 | |
---|
| 170 | dim3 tids(TIDSX, TIDSY); |
---|
| 171 | |
---|
| 172 | cData *p; |
---|
| 173 | cudaGLMapBufferObject((void**)&p, buffer); |
---|
| 174 | cutilCheckMsg("cudaGLMapBufferObject failed"); |
---|
| 175 | |
---|
| 176 | advectParticles_k<<<grid, tids>>>(p, v, dx, dy, dt, TILEY/TIDSY, tPitch); |
---|
| 177 | cutilCheckMsg("advectParticles_k failed."); |
---|
| 178 | |
---|
| 179 | cudaGLUnmapBufferObject(buffer); |
---|
| 180 | cutilCheckMsg("cudaGLUnmapBufferObject failed"); |
---|
| 181 | } |
---|
| 182 | |
---|
| 183 | void display(void) { |
---|
| 184 | cutilCheckError(cutStartTimer(timer)); |
---|
| 185 | |
---|
| 186 | // simulate fluid |
---|
| 187 | advectVelocity(dvfield, (float*)vxfield, (float*)vyfield, DIM, RPADW, DIM, DT); |
---|
| 188 | diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS); |
---|
| 189 | updateVelocity(dvfield, (float*)vxfield, (float*)vyfield, DIM, RPADW, DIM); |
---|
| 190 | advectParticles(vbo, dvfield, DIM, DIM, DT); |
---|
| 191 | |
---|
| 192 | // render points from vertex buffer |
---|
| 193 | glClear(GL_COLOR_BUFFER_BIT); |
---|
| 194 | glColor4f(0,1,0,0.5f); glPointSize(1); |
---|
| 195 | glEnable(GL_POINT_SMOOTH); |
---|
| 196 | glEnable(GL_BLEND); |
---|
| 197 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); |
---|
| 198 | glEnableClientState(GL_VERTEX_ARRAY); |
---|
| 199 | glDisable(GL_DEPTH_TEST); |
---|
| 200 | glDisable(GL_CULL_FACE); |
---|
| 201 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo); |
---|
| 202 | glVertexPointer(2, GL_FLOAT, 0, NULL); |
---|
| 203 | glDrawArrays(GL_POINTS, 0, DS); |
---|
| 204 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); |
---|
| 205 | glDisableClientState(GL_VERTEX_ARRAY); |
---|
| 206 | glDisableClientState(GL_TEXTURE_COORD_ARRAY); |
---|
| 207 | glDisable(GL_TEXTURE_2D); |
---|
| 208 | |
---|
| 209 | // Finish timing before swap buffers to avoid refresh sync |
---|
| 210 | cutilCheckError(cutStopTimer(timer)); |
---|
| 211 | glutSwapBuffers(); |
---|
| 212 | |
---|
| 213 | fpsCount++; |
---|
| 214 | if (fpsCount == fpsLimit) { |
---|
| 215 | char fps[256]; |
---|
| 216 | float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f); |
---|
| 217 | sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps); |
---|
| 218 | glutSetWindowTitle(fps); |
---|
| 219 | fpsCount = 0; |
---|
| 220 | fpsLimit = (int)max(ifps, 1.f); |
---|
| 221 | cutilCheckError(cutResetTimer(timer)); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | #if defined(SAGE_APP) |
---|
| 225 | glReadPixels(0, 0, wWidth, wHeight, GL_RGB, GL_UNSIGNED_BYTE, rgbBuffer); |
---|
| 226 | sageInf.swapBuffer(); |
---|
| 227 | rgbBuffer = (GLubyte *)sageInf.getBuffer(); |
---|
| 228 | |
---|
| 229 | sageMessage msg; |
---|
| 230 | if (sageInf.checkMsg(msg, false) > 0) { |
---|
| 231 | switch (msg.getCode()) { |
---|
| 232 | case APP_QUIT : { |
---|
| 233 | exit(0); |
---|
| 234 | break; |
---|
| 235 | } |
---|
| 236 | } |
---|
| 237 | } |
---|
| 238 | #endif |
---|
| 239 | |
---|
| 240 | glutPostRedisplay(); |
---|
| 241 | } |
---|
| 242 | |
---|
| 243 | void autoTest() |
---|
| 244 | { |
---|
| 245 | for(int count=0;count<g_iFrameToCompare;count++) |
---|
| 246 | { |
---|
| 247 | // add in a little force so the automated testing is interesing. |
---|
| 248 | if(g_bQAReadback && g_bQAAddTestForce) |
---|
| 249 | { |
---|
| 250 | int x = wWidth/(count+1); int y = wHeight/(count+1); |
---|
| 251 | float fx = (x / (float)wWidth); |
---|
| 252 | float fy = (y / (float)wHeight); |
---|
| 253 | int nx = (int)(fx * DIM); |
---|
| 254 | int ny = (int)(fy * DIM); |
---|
| 255 | |
---|
| 256 | int ddx = 35; |
---|
| 257 | int ddy = 35; |
---|
| 258 | fx = ddx / (float)wWidth; |
---|
| 259 | fy = ddy / (float)wHeight; |
---|
| 260 | int spy = ny-FR; |
---|
| 261 | int spx = nx-FR; |
---|
| 262 | |
---|
| 263 | addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR); |
---|
| 264 | lastx = x; lasty = y; |
---|
| 265 | //g_bQAAddTestForce = false; // only add it once |
---|
| 266 | } |
---|
| 267 | display(); |
---|
| 268 | } |
---|
| 269 | |
---|
| 270 | // compare to offical reference image, printing PASS or FAIL. |
---|
| 271 | /* |
---|
| 272 | printf("> (Frame %d) Readback BackBuffer\n", 100); |
---|
| 273 | g_CheckRender->readback( wWidth, wHeight, NULL ); |
---|
| 274 | g_CheckRender->savePPM(sOriginal[0], true, NULL); |
---|
| 275 | if (!g_CheckRender->PPMvsPPM(sOriginal[0], sReference[0], MAX_EPSILON_ERROR)) { |
---|
| 276 | g_TotalErrors++; |
---|
| 277 | } |
---|
| 278 | */ |
---|
| 279 | } |
---|
| 280 | |
---|
| 281 | |
---|
| 282 | void idle(void) { |
---|
| 283 | glutPostRedisplay(); |
---|
| 284 | } |
---|
| 285 | |
---|
| 286 | void initParticles(cData *p, int dx, int dy) { |
---|
| 287 | int i, j; |
---|
| 288 | for (i = 0; i < dy; i++) { |
---|
| 289 | for (j = 0; j < dx; j++) { |
---|
| 290 | p[i*dx+j].x = ((j+0.5)/dx) + |
---|
| 291 | (rand() / (float)RAND_MAX - 0.5f) / dx; |
---|
| 292 | p[i*dx+j].y = ((i+0.5)/dy) + |
---|
| 293 | (rand() / (float)RAND_MAX - 0.5f) / dy; |
---|
| 294 | } |
---|
| 295 | } |
---|
| 296 | } |
---|
| 297 | |
---|
| 298 | void keyboard( unsigned char key, int x, int y) { |
---|
| 299 | switch( key) { |
---|
| 300 | case 27: |
---|
| 301 | #if defined(SAGE_APP) |
---|
| 302 | sageInf.shutdown(); |
---|
| 303 | #endif |
---|
| 304 | exit (0); |
---|
| 305 | case 'r': |
---|
| 306 | memset(hvfield, 0, sizeof(cData) * DS); |
---|
| 307 | cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, |
---|
| 308 | cudaMemcpyHostToDevice); |
---|
| 309 | |
---|
| 310 | initParticles(particles, DIM, DIM); |
---|
| 311 | |
---|
| 312 | cudaGLUnregisterBufferObject(vbo); |
---|
| 313 | cutilCheckMsg("cudaGLUnregisterBufferObject failed"); |
---|
| 314 | |
---|
| 315 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo); |
---|
| 316 | glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS, |
---|
| 317 | particles, GL_DYNAMIC_DRAW_ARB); |
---|
| 318 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); |
---|
| 319 | |
---|
| 320 | cudaGLRegisterBufferObject(vbo); |
---|
| 321 | cutilCheckMsg("cudaGLRegisterBufferObject failed"); |
---|
| 322 | break; |
---|
| 323 | default: break; |
---|
| 324 | } |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | void click(int button, int updown, int x, int y) { |
---|
| 328 | lastx = x; lasty = y; |
---|
| 329 | clicked = !clicked; |
---|
| 330 | } |
---|
| 331 | |
---|
| 332 | void motion (int x, int y) { |
---|
| 333 | // Convert motion coordinates to domain |
---|
| 334 | float fx = (lastx / (float)wWidth); |
---|
| 335 | float fy = (lasty / (float)wHeight); |
---|
| 336 | int nx = (int)(fx * DIM); |
---|
| 337 | int ny = (int)(fy * DIM); |
---|
| 338 | |
---|
| 339 | if (clicked && nx < DIM-FR && nx > FR-1 && ny < DIM-FR && ny > FR-1) { |
---|
| 340 | int ddx = x - lastx; |
---|
| 341 | int ddy = y - lasty; |
---|
| 342 | fx = ddx / (float)wWidth; |
---|
| 343 | fy = ddy / (float)wHeight; |
---|
| 344 | int spy = ny-FR; |
---|
| 345 | int spx = nx-FR; |
---|
| 346 | addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR); |
---|
| 347 | lastx = x; lasty = y; |
---|
| 348 | } |
---|
| 349 | glutPostRedisplay(); |
---|
| 350 | } |
---|
| 351 | |
---|
| 352 | void reshape(int x, int y) { |
---|
| 353 | // no resize for SAGE |
---|
| 354 | //wWidth = x; wHeight = y; |
---|
| 355 | |
---|
| 356 | glViewport(0, 0, x, y); |
---|
| 357 | glMatrixMode(GL_PROJECTION); |
---|
| 358 | glLoadIdentity(); |
---|
| 359 | glOrtho(0, 1, 1, 0, 0, 1); |
---|
| 360 | glMatrixMode(GL_MODELVIEW); |
---|
| 361 | glLoadIdentity(); |
---|
| 362 | glutPostRedisplay(); |
---|
| 363 | } |
---|
| 364 | |
---|
| 365 | void cleanup(void) { |
---|
| 366 | cudaGLUnregisterBufferObject(vbo); |
---|
| 367 | cutilCheckMsg("cudaGLUnregisterBufferObject failed"); |
---|
| 368 | |
---|
| 369 | unbindTexture(); |
---|
| 370 | deleteTexture(); |
---|
| 371 | |
---|
| 372 | // Free all host and device resources |
---|
| 373 | free(hvfield); free(particles); |
---|
| 374 | cudaFree(dvfield); |
---|
| 375 | cudaFree(vxfield); cudaFree(vyfield); |
---|
| 376 | cufftDestroy(planr2c); |
---|
| 377 | cufftDestroy(planc2r); |
---|
| 378 | |
---|
| 379 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); |
---|
| 380 | glDeleteBuffersARB(1, &vbo); |
---|
| 381 | |
---|
| 382 | cutilCheckError(cutDeleteTimer(timer)); |
---|
| 383 | } |
---|
| 384 | |
---|
| 385 | int initGL(int argc, char **argv) |
---|
| 386 | { |
---|
| 387 | glutInit(&argc, argv); |
---|
| 388 | glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); |
---|
| 389 | glutInitWindowSize(wWidth, wHeight); |
---|
| 390 | glutCreateWindow("Compute Stable Fluids"); |
---|
| 391 | glutDisplayFunc(display); |
---|
| 392 | glutKeyboardFunc(keyboard); |
---|
| 393 | glutMouseFunc(click); |
---|
| 394 | glutMotionFunc(motion); |
---|
| 395 | glutReshapeFunc(reshape); |
---|
| 396 | glutIdleFunc(idle); |
---|
| 397 | |
---|
| 398 | #if defined(SAGE_APP) |
---|
| 399 | sageRect cudaMap; |
---|
| 400 | cudaMap.left = 0.0; |
---|
| 401 | cudaMap.right = 1.0; |
---|
| 402 | cudaMap.bottom = 0.0; |
---|
| 403 | cudaMap.top = 1.0; |
---|
| 404 | |
---|
| 405 | sailConfig scfg; |
---|
| 406 | scfg.init("cuda.conf"); |
---|
| 407 | scfg.setAppName("cuda"); |
---|
| 408 | scfg.rank = 0; |
---|
| 409 | |
---|
| 410 | scfg.resX = wWidth; |
---|
| 411 | scfg.resY = wHeight; |
---|
| 412 | scfg.winX = 0; |
---|
| 413 | scfg.winY = 0; |
---|
| 414 | scfg.winWidth = 2*wWidth; |
---|
| 415 | scfg.winHeight = 2*wHeight; |
---|
| 416 | scfg.imageMap = cudaMap; |
---|
| 417 | scfg.pixFmt = PIXFMT_888; |
---|
| 418 | scfg.rowOrd = BOTTOM_TO_TOP; |
---|
| 419 | scfg.master = true; |
---|
| 420 | |
---|
| 421 | sageInf.init(scfg); |
---|
| 422 | |
---|
| 423 | rgbBuffer = (GLubyte *)sageInf.getBuffer(); |
---|
| 424 | |
---|
| 425 | fprintf(stderr, "sail initialized\n"); |
---|
| 426 | #endif |
---|
| 427 | |
---|
| 428 | glewInit(); |
---|
| 429 | if (! glewIsSupported( |
---|
| 430 | "GL_ARB_vertex_buffer_object" |
---|
| 431 | )) { |
---|
| 432 | fprintf( stderr, "ERROR: Support for necessary OpenGL extensions missing."); |
---|
| 433 | fflush( stderr); |
---|
| 434 | return CUTFalse; |
---|
| 435 | } |
---|
| 436 | return CUTTrue; |
---|
| 437 | } |
---|
| 438 | |
---|
| 439 | |
---|
| 440 | int main(int argc, char** argv) |
---|
| 441 | { |
---|
| 442 | // First initialize OpenGL context, so we can properly set the GL for CUDA. |
---|
| 443 | // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop. |
---|
| 444 | if (CUTFalse == initGL(argc, argv)) { |
---|
| 445 | return CUTFalse; |
---|
| 446 | } |
---|
| 447 | |
---|
| 448 | // use command-line specified CUDA device, otherwise use device with highest Gflops/s |
---|
| 449 | if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) |
---|
| 450 | cutilGLDeviceInit(argc, argv); |
---|
| 451 | else { |
---|
| 452 | cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); |
---|
| 453 | } |
---|
| 454 | |
---|
| 455 | // automatied build testing harness |
---|
| 456 | if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") || |
---|
| 457 | cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) |
---|
| 458 | { |
---|
| 459 | g_bQAReadback = true; |
---|
| 460 | } |
---|
| 461 | |
---|
| 462 | // Allocate and initialize host data |
---|
| 463 | GLint bsize; |
---|
| 464 | |
---|
| 465 | cutilCheckError(cutCreateTimer(&timer)); |
---|
| 466 | cutilCheckError(cutResetTimer(timer)); |
---|
| 467 | |
---|
| 468 | hvfield = (cData*)malloc(sizeof(cData) * DS); |
---|
| 469 | memset(hvfield, 0, sizeof(cData) * DS); |
---|
| 470 | |
---|
| 471 | // Allocate and initialize device data |
---|
| 472 | cudaMallocPitch((void**)&dvfield, &tPitch, sizeof(cData)*DIM, DIM); |
---|
| 473 | |
---|
| 474 | cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS, |
---|
| 475 | cudaMemcpyHostToDevice); |
---|
| 476 | // Temporary complex velocity field data |
---|
| 477 | cudaMalloc((void**)&vxfield, sizeof(cData) * PDS); |
---|
| 478 | cudaMalloc((void**)&vyfield, sizeof(cData) * PDS); |
---|
| 479 | |
---|
| 480 | setupTexture(DIM, DIM); |
---|
| 481 | bindTexture(); |
---|
| 482 | |
---|
| 483 | // Create particle array |
---|
| 484 | particles = (cData*)malloc(sizeof(cData) * DS); |
---|
| 485 | memset(particles, 0, sizeof(cData) * DS); |
---|
| 486 | |
---|
| 487 | initParticles(particles, DIM, DIM); |
---|
| 488 | |
---|
| 489 | // Create CUFFT transform plan configuration |
---|
| 490 | cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C); |
---|
| 491 | cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R); |
---|
| 492 | #if 0 |
---|
| 493 | glutInit(&argc, argv); |
---|
| 494 | glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); |
---|
| 495 | glutInitWindowSize(wWidth, wHeight); |
---|
| 496 | glutCreateWindow("Compute Stable Fluids"); |
---|
| 497 | glutDisplayFunc(display); |
---|
| 498 | glutKeyboardFunc(keyboard); |
---|
| 499 | glutMouseFunc(click); |
---|
| 500 | glutMotionFunc(motion); |
---|
| 501 | glutReshapeFunc(reshape); |
---|
| 502 | glutIdleFunc(idle); |
---|
| 503 | |
---|
| 504 | glewInit(); |
---|
| 505 | if (! glewIsSupported( |
---|
| 506 | "GL_ARB_vertex_buffer_object" |
---|
| 507 | )) { |
---|
| 508 | fprintf( stderr, "ERROR: Support for necessary OpenGL extensions missing."); |
---|
| 509 | fflush( stderr); |
---|
| 510 | return CUTFalse; |
---|
| 511 | } |
---|
| 512 | #endif |
---|
| 513 | glGenBuffersARB(1, &vbo); |
---|
| 514 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo); |
---|
| 515 | glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS, |
---|
| 516 | particles, GL_DYNAMIC_DRAW_ARB); |
---|
| 517 | |
---|
| 518 | glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize); |
---|
| 519 | if (bsize != (sizeof(cData) * DS)) |
---|
| 520 | goto EXTERR; |
---|
| 521 | glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); |
---|
| 522 | |
---|
| 523 | cudaGLRegisterBufferObject(vbo); |
---|
| 524 | cutilCheckMsg("cudaGLRegisterBufferObject failed"); |
---|
| 525 | |
---|
| 526 | if (g_bQAReadback) |
---|
| 527 | { |
---|
| 528 | /* g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4); |
---|
| 529 | g_CheckRender->setPixelFormat(GL_RGBA); |
---|
| 530 | g_CheckRender->setExecPath(argv[0]); |
---|
| 531 | g_CheckRender->EnableQAReadback(true); |
---|
| 532 | |
---|
| 533 | autoTest(); |
---|
| 534 | */ |
---|
| 535 | } else { |
---|
| 536 | atexit(cleanup); |
---|
| 537 | glutMainLoop(); |
---|
| 538 | } |
---|
| 539 | |
---|
| 540 | cudaThreadExit(); |
---|
| 541 | return 0; |
---|
| 542 | |
---|
| 543 | EXTERR: |
---|
| 544 | printf("Failed to initialize GL extensions.\n"); |
---|
| 545 | |
---|
| 546 | cudaThreadExit(); |
---|
| 547 | return 1; |
---|
| 548 | } |
---|