source: trunk/src/testing/app/fluidsGL/fluidsGL.cu @ 4

Revision 4, 15.6 KB checked in by ajaworski, 13 years ago (diff)

Added modified SAGE sources

RevLine 
[4]1/*
2 * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
3 *
4 * NVIDIA Corporation and its licensors retain all intellectual property and
5 * proprietary rights in and to this software and related documentation and
6 * any modifications thereto.  Any use, reproduction, disclosure, or distribution
7 * of this software and related documentation without an express license
8 * agreement from NVIDIA Corporation is strictly prohibited.
9 *
10 */
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14#include <GL/glew.h>
15#include <cufft.h>
16#include <cutil_inline.h>
17#include <cutil_gl_inline.h>
18#include <cuda_gl_interop.h>
19//#include <rendercheck_gl.h>
20
21#if defined(SAGE_APP)
22#include "sail.h"
23GLubyte *rgbBuffer = 0;
24sail sageInf; // sail object
25#endif
26
27#if defined(__APPLE__) || defined(MACOSX)
28#include <GLUT/glut.h>
29#else
30#include <GL/glut.h>
31#endif
32
33#include "fluidsGL_kernels.cu"
34
35#define MAX_EPSILON_ERROR 1.0f
36
37// Define the files that are to be save and the reference images for validation
38const char *sOriginal[] =
39{
40    "fluidsGL.ppm",
41    NULL
42};
43
44const char *sReference[] =
45{
46    "ref_fluidsGL.ppm",
47    NULL
48};
49
50// CUDA example code that implements the frequency space version of
51// Jos Stam's paper 'Stable Fluids' in 2D. This application uses the
52// CUDA FFT library (CUFFT) to perform velocity diffusion and to
53// force non-divergence in the velocity field at each time step. It uses
54// CUDA-OpenGL interoperability to update the particle field directly
55// instead of doing a copy to system memory before drawing. Texture is
56// used for automatic bilinear interpolation at the velocity advection step.
57
58#ifdef __DEVICE_EMULATION__
59#define DIM    64        // Square size of solver domain
60#else
61#define DIM    768       // Square size of solver domani
62#endif
63#define DS    (DIM*DIM)  // Total domain size
64#define CPADW (DIM/2+1)  // Padded width for real->complex in-place FFT
65#define RPADW (2*(DIM/2+1))  // Padded width for real->complex in-place FFT
66#define PDS   (DIM*CPADW) // Padded total domain size
67
68#define DT     0.09f     // Delta T for interative solver
69#define VIS    0.0025f   // Viscosity constant
70#define FORCE (5.8f*DIM) // Force scale factor
71#define FR     4         // Force update radius
72
73#define TILEX 64 // Tile width
74#define TILEY 64 // Tile height
75#define TIDSX 64 // Tids in X
76#define TIDSY 4  // Tids in Y
77
78void cleanup(void);       
79
80// CUFFT plan handle
81static cufftHandle planr2c;
82static cufftHandle planc2r;
83static cData *vxfield = NULL;
84static cData *vyfield = NULL;
85
86cData *hvfield = NULL;
87cData *dvfield = NULL;
88static int wWidth = max(512,DIM);
89static int wHeight = max(512,DIM);
90
91static int clicked = 0;
92static int fpsCount = 0;
93static int fpsLimit = 1;
94unsigned int timer;
95
96// Particle data
97GLuint vbo = 0;                 // OpenGL vertex buffer object
98static cData *particles = NULL; // particle positions in host memory
99static int lastx = 0, lasty = 0;
100
101// Texture pitch
102size_t tPitch = 0; // Now this is compatible with gcc in 64-bit
103
104bool                              g_bQAReadback     = false;
105bool                              g_bQAAddTestForce = true;
106int                                       g_iFrameToCompare = 100;
107int                   g_TotalErrors     = 0;
108
109// CheckFBO/BackBuffer class objects
110//CheckRender       *g_CheckRender = NULL;
111
112void autoTest();
113
114
115void addForces(cData *v, int dx, int dy, int spx, int spy, float fx, float fy, int r) {
116
117    dim3 tids(2*r+1, 2*r+1);
118   
119    addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch);
120    cutilCheckMsg("addForces_k failed.");
121}
122
123void advectVelocity(cData *v, float *vx, float *vy,
124                    int dx, int pdx, int dy, float dt) {
125   
126    dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
127
128    dim3 tids(TIDSX, TIDSY);
129
130    updateTexture(v, DIM*sizeof(cData), DIM, tPitch);
131    advectVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, dt, TILEY/TIDSY);
132
133    cutilCheckMsg("advectVelocity_k failed.");
134}
135
136void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt,
137                    float visc) {
138    // Forward FFT
139    cufftExecR2C(planr2c, (cufftReal*)vx, (cufftComplex*)vx);
140    cufftExecR2C(planr2c, (cufftReal*)vy, (cufftComplex*)vy);
141
142    uint3 grid = make_uint3((dx/TILEX)+(!(dx%TILEX)?0:1),
143                            (dy/TILEY)+(!(dy%TILEY)?0:1), 1);
144
145    uint3 tids = make_uint3(TIDSX, TIDSY, 1);
146   
147    diffuseProject_k<<<grid, tids>>>(vx, vy, dx, dy, dt, visc, TILEY/TIDSY);
148    cutilCheckMsg("diffuseProject_k failed.");
149
150    // Inverse FFT
151    cufftExecC2R(planc2r, (cufftComplex*)vx, (cufftReal*)vx);
152    cufftExecC2R(planc2r, (cufftComplex*)vy, (cufftReal*)vy);
153}
154
155void updateVelocity(cData *v, float *vx, float *vy,
156                    int dx, int pdx, int dy) {
157
158    dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
159
160    dim3 tids(TIDSX, TIDSY);
161
162    updateVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, TILEY/TIDSY, tPitch);
163    cutilCheckMsg("updateVelocity_k failed.");
164}
165
166void advectParticles(GLuint buffer, cData *v, int dx, int dy, float dt) {
167   
168    dim3 grid((dx/TILEX)+(!(dx%TILEX)?0:1), (dy/TILEY)+(!(dy%TILEY)?0:1));
169
170    dim3 tids(TIDSX, TIDSY);
171
172    cData *p;
173    cudaGLMapBufferObject((void**)&p, buffer);
174    cutilCheckMsg("cudaGLMapBufferObject failed");
175   
176    advectParticles_k<<<grid, tids>>>(p, v, dx, dy, dt, TILEY/TIDSY, tPitch);
177    cutilCheckMsg("advectParticles_k failed.");
178   
179    cudaGLUnmapBufferObject(buffer);
180    cutilCheckMsg("cudaGLUnmapBufferObject failed");
181}
182
183void display(void) { 
184   cutilCheckError(cutStartTimer(timer)); 
185   
186   // simulate fluid
187   advectVelocity(dvfield, (float*)vxfield, (float*)vyfield, DIM, RPADW, DIM, DT);
188   diffuseProject(vxfield, vyfield, CPADW, DIM, DT, VIS);
189   updateVelocity(dvfield, (float*)vxfield, (float*)vyfield, DIM, RPADW, DIM);
190   advectParticles(vbo, dvfield, DIM, DIM, DT);
191   
192   // render points from vertex buffer
193   glClear(GL_COLOR_BUFFER_BIT);
194   glColor4f(0,1,0,0.5f); glPointSize(1);
195   glEnable(GL_POINT_SMOOTH);
196   glEnable(GL_BLEND);
197   glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
198   glEnableClientState(GL_VERTEX_ARRAY);   
199   glDisable(GL_DEPTH_TEST);
200   glDisable(GL_CULL_FACE);
201   glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
202   glVertexPointer(2, GL_FLOAT, 0, NULL);
203   glDrawArrays(GL_POINTS, 0, DS);
204   glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
205   glDisableClientState(GL_VERTEX_ARRAY);
206   glDisableClientState(GL_TEXTURE_COORD_ARRAY);
207   glDisable(GL_TEXTURE_2D);
208
209   // Finish timing before swap buffers to avoid refresh sync
210   cutilCheckError(cutStopTimer(timer)); 
211   glutSwapBuffers();
212   
213   fpsCount++;
214   if (fpsCount == fpsLimit) {
215       char fps[256];
216       float ifps = 1.f / (cutGetAverageTimerValue(timer) / 1000.f);
217       sprintf(fps, "Cuda/GL Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, ifps); 
218       glutSetWindowTitle(fps);
219       fpsCount = 0;
220       fpsLimit = (int)max(ifps, 1.f);
221       cutilCheckError(cutResetTimer(timer)); 
222    }
223
224#if defined(SAGE_APP)
225        glReadPixels(0, 0, wWidth, wHeight, GL_RGB, GL_UNSIGNED_BYTE, rgbBuffer);
226        sageInf.swapBuffer();
227        rgbBuffer = (GLubyte *)sageInf.getBuffer();
228
229         sageMessage msg;
230         if (sageInf.checkMsg(msg, false) > 0) {
231                 switch (msg.getCode()) {
232                         case APP_QUIT : {
233                                exit(0);
234                                break;
235                        }
236                 }
237         }
238#endif
239
240    glutPostRedisplay();
241}
242
243void autoTest()
244{
245        for(int count=0;count<g_iFrameToCompare;count++)
246        {
247                // add in a little force so the automated testing is interesing.
248                if(g_bQAReadback && g_bQAAddTestForce)
249                {
250                        int x = wWidth/(count+1); int y = wHeight/(count+1);
251                        float fx = (x / (float)wWidth);       
252                        float fy = (y / (float)wHeight);
253                        int nx = (int)(fx * DIM);       
254                        int ny = (int)(fy * DIM);   
255
256                        int ddx = 35;
257                        int ddy = 35;
258                        fx = ddx / (float)wWidth;
259                        fy = ddy / (float)wHeight;
260                        int spy = ny-FR;
261                        int spx = nx-FR;
262
263            addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR);
264            lastx = x; lasty = y;
265                        //g_bQAAddTestForce = false; // only add it once
266                }
267        display();
268    }
269
270        // compare to offical reference image, printing PASS or FAIL.
271    /*
272    printf("> (Frame %d) Readback BackBuffer\n", 100);
273    g_CheckRender->readback( wWidth, wHeight, NULL );
274    g_CheckRender->savePPM(sOriginal[0], true, NULL);
275    if (!g_CheckRender->PPMvsPPM(sOriginal[0], sReference[0], MAX_EPSILON_ERROR)) {
276        g_TotalErrors++;
277    }
278    */
279}
280
281
282void idle(void) {
283    glutPostRedisplay();
284}
285
286void initParticles(cData *p, int dx, int dy) {
287    int i, j;
288    for (i = 0; i < dy; i++) {
289        for (j = 0; j < dx; j++) {
290            p[i*dx+j].x = ((j+0.5)/dx) +
291                          (rand() / (float)RAND_MAX - 0.5f) / dx;
292            p[i*dx+j].y = ((i+0.5)/dy) +
293                          (rand() / (float)RAND_MAX - 0.5f) / dy;
294        }
295    }
296}
297
298void keyboard( unsigned char key, int x, int y) {
299    switch( key) {
300        case 27:
301#if defined(SAGE_APP)
302                sageInf.shutdown();
303#endif
304                exit (0);
305        case 'r':
306            memset(hvfield, 0, sizeof(cData) * DS);
307            cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS,
308                       cudaMemcpyHostToDevice);
309
310            initParticles(particles, DIM, DIM);
311
312            cudaGLUnregisterBufferObject(vbo);
313            cutilCheckMsg("cudaGLUnregisterBufferObject failed");
314   
315            glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
316            glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS,
317                            particles, GL_DYNAMIC_DRAW_ARB);
318            glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
319
320            cudaGLRegisterBufferObject(vbo);
321            cutilCheckMsg("cudaGLRegisterBufferObject failed");
322            break;
323        default: break;
324    }
325}
326
327void click(int button, int updown, int x, int y) {
328    lastx = x; lasty = y;
329    clicked = !clicked;
330}
331
332void motion (int x, int y) {
333    // Convert motion coordinates to domain
334    float fx = (lastx / (float)wWidth);       
335    float fy = (lasty / (float)wHeight);
336    int nx = (int)(fx * DIM);       
337    int ny = (int)(fy * DIM);   
338   
339    if (clicked && nx < DIM-FR && nx > FR-1 && ny < DIM-FR && ny > FR-1) {
340        int ddx = x - lastx;
341        int ddy = y - lasty;
342        fx = ddx / (float)wWidth;
343        fy = ddy / (float)wHeight;
344        int spy = ny-FR;
345        int spx = nx-FR;
346        addForces(dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, FORCE * DT * fy, FR);
347        lastx = x; lasty = y;
348    }
349    glutPostRedisplay();
350}
351
352void reshape(int x, int y) {
353    // no resize for SAGE
354    //wWidth = x; wHeight = y;
355
356    glViewport(0, 0, x, y);
357    glMatrixMode(GL_PROJECTION);
358    glLoadIdentity();
359    glOrtho(0, 1, 1, 0, 0, 1);
360    glMatrixMode(GL_MODELVIEW);
361    glLoadIdentity();
362    glutPostRedisplay();
363}
364
365void cleanup(void) {
366    cudaGLUnregisterBufferObject(vbo);
367    cutilCheckMsg("cudaGLUnregisterBufferObject failed");
368
369    unbindTexture();
370    deleteTexture();
371
372    // Free all host and device resources
373    free(hvfield); free(particles);
374    cudaFree(dvfield);
375    cudaFree(vxfield); cudaFree(vyfield);
376    cufftDestroy(planr2c);
377    cufftDestroy(planc2r);
378
379    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
380    glDeleteBuffersARB(1, &vbo);
381   
382    cutilCheckError(cutDeleteTimer(timer)); 
383}
384
385int initGL(int argc, char **argv)
386{
387    glutInit(&argc, argv);
388    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
389    glutInitWindowSize(wWidth, wHeight);
390    glutCreateWindow("Compute Stable Fluids");
391    glutDisplayFunc(display);
392    glutKeyboardFunc(keyboard);
393    glutMouseFunc(click);
394    glutMotionFunc(motion);
395    glutReshapeFunc(reshape);
396    glutIdleFunc(idle);
397
398#if defined(SAGE_APP)
399         sageRect cudaMap;
400         cudaMap.left = 0.0;
401         cudaMap.right = 1.0;
402         cudaMap.bottom = 0.0;
403         cudaMap.top = 1.0;
404
405         sailConfig scfg;
406         scfg.init("cuda.conf");
407         scfg.setAppName("cuda");
408         scfg.rank = 0;
409
410         scfg.resX = wWidth;
411         scfg.resY = wHeight;
412         scfg.winX = 0;
413         scfg.winY = 0;
414         scfg.winWidth = 2*wWidth;
415         scfg.winHeight = 2*wHeight;
416         scfg.imageMap = cudaMap;
417         scfg.pixFmt = PIXFMT_888;
418         scfg.rowOrd = BOTTOM_TO_TOP;
419         scfg.master = true;
420
421         sageInf.init(scfg);
422
423        rgbBuffer = (GLubyte *)sageInf.getBuffer();
424
425         fprintf(stderr, "sail initialized\n");
426#endif
427
428    glewInit();
429    if (! glewIsSupported(
430        "GL_ARB_vertex_buffer_object"
431                )) {
432        fprintf( stderr, "ERROR: Support for necessary OpenGL extensions missing.");
433        fflush( stderr);
434        return CUTFalse;
435    }
436    return CUTTrue;
437}
438
439
440int main(int argc, char** argv)
441{
442    // First initialize OpenGL context, so we can properly set the GL for CUDA.
443    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
444    if (CUTFalse == initGL(argc, argv)) {
445        return CUTFalse;
446    }
447
448    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
449    if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
450        cutilGLDeviceInit(argc, argv);
451    else {
452        cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
453    }
454
455        // automatied build testing harness
456    if (cutCheckCmdLineFlag(argc, (const char **)argv, "qatest") ||
457                cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt"))
458    {
459        g_bQAReadback = true;
460    }
461
462    // Allocate and initialize host data
463    GLint bsize;
464
465    cutilCheckError(cutCreateTimer(&timer));
466    cutilCheckError(cutResetTimer(timer)); 
467   
468    hvfield = (cData*)malloc(sizeof(cData) * DS);
469    memset(hvfield, 0, sizeof(cData) * DS);
470 
471    // Allocate and initialize device data
472    cudaMallocPitch((void**)&dvfield, &tPitch, sizeof(cData)*DIM, DIM);
473   
474    cudaMemcpy(dvfield, hvfield, sizeof(cData) * DS,
475               cudaMemcpyHostToDevice);
476    // Temporary complex velocity field data     
477    cudaMalloc((void**)&vxfield, sizeof(cData) * PDS);
478    cudaMalloc((void**)&vyfield, sizeof(cData) * PDS);
479   
480    setupTexture(DIM, DIM);
481    bindTexture();
482   
483    // Create particle array
484    particles = (cData*)malloc(sizeof(cData) * DS);
485    memset(particles, 0, sizeof(cData) * DS);   
486   
487    initParticles(particles, DIM, DIM);
488
489    // Create CUFFT transform plan configuration
490    cufftPlan2d(&planr2c, DIM, DIM, CUFFT_R2C);
491    cufftPlan2d(&planc2r, DIM, DIM, CUFFT_C2R);
492#if 0   
493    glutInit(&argc, argv);
494    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
495    glutInitWindowSize(wWidth, wHeight);
496    glutCreateWindow("Compute Stable Fluids");
497    glutDisplayFunc(display);
498    glutKeyboardFunc(keyboard);
499    glutMouseFunc(click);
500    glutMotionFunc(motion);
501    glutReshapeFunc(reshape);
502    glutIdleFunc(idle);
503
504    glewInit();
505    if (! glewIsSupported(
506        "GL_ARB_vertex_buffer_object"
507                )) {
508        fprintf( stderr, "ERROR: Support for necessary OpenGL extensions missing.");
509        fflush( stderr);
510        return CUTFalse;
511    }
512#endif
513    glGenBuffersARB(1, &vbo);
514    glBindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
515    glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(cData) * DS,
516                    particles, GL_DYNAMIC_DRAW_ARB);
517
518    glGetBufferParameterivARB(GL_ARRAY_BUFFER_ARB, GL_BUFFER_SIZE_ARB, &bsize);
519    if (bsize != (sizeof(cData) * DS))
520        goto EXTERR;
521    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
522
523    cudaGLRegisterBufferObject(vbo);
524    cutilCheckMsg("cudaGLRegisterBufferObject failed");
525
526    if (g_bQAReadback)
527    {
528/*      g_CheckRender = new CheckBackBuffer(wWidth, wHeight, 4);
529        g_CheckRender->setPixelFormat(GL_RGBA);
530        g_CheckRender->setExecPath(argv[0]);
531        g_CheckRender->EnableQAReadback(true);
532
533        autoTest();
534*/
535    } else {
536        atexit(cleanup);
537        glutMainLoop();
538    }
539
540    cudaThreadExit();
541    return 0;
542
543EXTERR:
544    printf("Failed to initialize GL extensions.\n");
545
546    cudaThreadExit();
547    return 1;
548}
Note: See TracBrowser for help on using the repository browser.