[4] | 1 | ################################################################################ |
---|
| 2 | # |
---|
| 3 | # Copyright 1993-2006 NVIDIA Corporation. All rights reserved. |
---|
| 4 | # |
---|
| 5 | # NOTICE TO USER: |
---|
| 6 | # |
---|
| 7 | # This source code is subject to NVIDIA ownership rights under U.S. and |
---|
| 8 | # international Copyright laws. |
---|
| 9 | # |
---|
| 10 | # NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE |
---|
| 11 | # CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR |
---|
| 12 | # IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH |
---|
| 13 | # REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF |
---|
| 14 | # MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. |
---|
| 15 | # IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, |
---|
| 16 | # OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS |
---|
| 17 | # OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE |
---|
| 18 | # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE |
---|
| 19 | # OR PERFORMANCE OF THIS SOURCE CODE. |
---|
| 20 | # |
---|
| 21 | # U.S. Government End Users. This source code is a "commercial item" as |
---|
| 22 | # that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of |
---|
| 23 | # "commercial computer software" and "commercial computer software |
---|
| 24 | # documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) |
---|
| 25 | # and is provided to the U.S. Government only as a commercial end item. |
---|
| 26 | # Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through |
---|
| 27 | # 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the |
---|
| 28 | # source code with only those rights set forth herein. |
---|
| 29 | # |
---|
| 30 | ################################################################################ |
---|
| 31 | # |
---|
| 32 | # Common build script |
---|
| 33 | # |
---|
| 34 | ################################################################################ |
---|
| 35 | |
---|
| 36 | .SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx |
---|
| 37 | |
---|
| 38 | # Add new SM Versions here as devices with new Compute Capability are released |
---|
| 39 | SM_VERSIONS := sm_10 sm_11 sm_12 sm_13 |
---|
| 40 | |
---|
| 41 | CUDA_INSTALL_PATH ?= /usr/local/cuda |
---|
| 42 | |
---|
| 43 | ifdef cuda-install |
---|
| 44 | CUDA_INSTALL_PATH := $(cuda-install) |
---|
| 45 | endif |
---|
| 46 | |
---|
| 47 | # detect OS |
---|
| 48 | OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) |
---|
| 49 | OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:]) |
---|
| 50 | # 'linux' is output for Linux system, 'darwin' for OS X |
---|
| 51 | DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) |
---|
| 52 | |
---|
| 53 | # detect if 32 bit or 64 bit system |
---|
| 54 | HP_64 = $(shell uname -m | grep 64) |
---|
| 55 | |
---|
| 56 | # Basic directory setup for SDK |
---|
| 57 | # (override directories only if they are not already defined) |
---|
| 58 | SRCDIR ?= |
---|
| 59 | ROOTDIR ?= ${HOME}/NVIDIA_GPU_Computing_SDK/C |
---|
| 60 | ROOTBINDIR ?= ${SAGE_DIRECTORY}/bin |
---|
| 61 | BINDIR ?= $(ROOTBINDIR) |
---|
| 62 | ROOTOBJDIR ?= obj |
---|
| 63 | LIBDIR := $(ROOTDIR)/lib |
---|
| 64 | COMMONDIR := $(ROOTDIR)/common |
---|
| 65 | |
---|
| 66 | # Compilers |
---|
| 67 | ifeq "$(strip $(HP_64))" "" |
---|
| 68 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc |
---|
| 69 | else |
---|
| 70 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc |
---|
| 71 | endif |
---|
| 72 | |
---|
| 73 | CXX := g++ |
---|
| 74 | CC := gcc |
---|
| 75 | LINK := g++ -fPIC |
---|
| 76 | |
---|
| 77 | # Includes |
---|
| 78 | INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc |
---|
| 79 | |
---|
| 80 | # architecture flag for cubin build |
---|
| 81 | CUBIN_ARCH_FLAG := |
---|
| 82 | |
---|
| 83 | # Warning flags |
---|
| 84 | CXXWARN_FLAGS := \ |
---|
| 85 | -W -Wall \ |
---|
| 86 | -Wimplicit \ |
---|
| 87 | -Wswitch \ |
---|
| 88 | -Wformat \ |
---|
| 89 | -Wchar-subscripts \ |
---|
| 90 | -Wparentheses \ |
---|
| 91 | -Wmultichar \ |
---|
| 92 | -Wtrigraphs \ |
---|
| 93 | -Wpointer-arith \ |
---|
| 94 | -Wcast-align \ |
---|
| 95 | -Wreturn-type \ |
---|
| 96 | -Wno-unused-function \ |
---|
| 97 | $(SPACE) |
---|
| 98 | |
---|
| 99 | CWARN_FLAGS := $(CXXWARN_FLAGS) \ |
---|
| 100 | -Wstrict-prototypes \ |
---|
| 101 | -Wmissing-prototypes \ |
---|
| 102 | -Wmissing-declarations \ |
---|
| 103 | -Wnested-externs \ |
---|
| 104 | -Wmain \ |
---|
| 105 | |
---|
| 106 | # Compiler-specific flags |
---|
| 107 | NVCCFLAGS := |
---|
| 108 | CXXFLAGS := $(CXXWARN_FLAGS) |
---|
| 109 | CFLAGS := $(CWARN_FLAGS) |
---|
| 110 | |
---|
| 111 | # Common flags |
---|
| 112 | COMMONFLAGS += $(INCLUDES) -DUNIX |
---|
| 113 | |
---|
| 114 | # Debug/release configuration |
---|
| 115 | ifeq ($(dbg),1) |
---|
| 116 | COMMONFLAGS += -g |
---|
| 117 | NVCCFLAGS += -D_DEBUG |
---|
| 118 | CXXFLAGS += -D_DEBUG |
---|
| 119 | CFLAGS += -D_DEBUG |
---|
| 120 | BINSUBDIR := debug |
---|
| 121 | LIBSUFFIX := D |
---|
| 122 | else |
---|
| 123 | COMMONFLAGS += -O2 |
---|
| 124 | BINSUBDIR := release |
---|
| 125 | LIBSUFFIX := |
---|
| 126 | NVCCFLAGS += --compiler-options -fno-strict-aliasing |
---|
| 127 | CXXFLAGS += -fno-strict-aliasing |
---|
| 128 | CFLAGS += -fno-strict-aliasing |
---|
| 129 | endif |
---|
| 130 | |
---|
| 131 | # append optional arch/SM version flags (such as -arch sm_11) |
---|
| 132 | #NVCCFLAGS += $(SMVERSIONFLAGS) |
---|
| 133 | |
---|
| 134 | # architecture flag for cubin build |
---|
| 135 | CUBIN_ARCH_FLAG := |
---|
| 136 | |
---|
| 137 | # OpenGL is used or not (if it is used, then it is necessary to include GLEW) |
---|
| 138 | ifeq ($(USEGLLIB),1) |
---|
| 139 | |
---|
| 140 | ifneq ($(DARWIN),) |
---|
| 141 | OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a |
---|
| 142 | else |
---|
| 143 | OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu |
---|
| 144 | |
---|
| 145 | ifeq "$(strip $(HP_64))" "" |
---|
| 146 | OPENGLLIB += -lGLEW -L/usr/X11R6/lib |
---|
| 147 | else |
---|
| 148 | OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64 |
---|
| 149 | endif |
---|
| 150 | endif |
---|
| 151 | |
---|
| 152 | CUBIN_ARCH_FLAG := -m64 |
---|
| 153 | endif |
---|
| 154 | |
---|
| 155 | ifeq ($(USEGLUT),1) |
---|
| 156 | ifneq ($(DARWIN),) |
---|
| 157 | OPENGLLIB += -framework GLUT |
---|
| 158 | else |
---|
| 159 | OPENGLLIB += -lglut |
---|
| 160 | endif |
---|
| 161 | endif |
---|
| 162 | |
---|
| 163 | ifeq ($(USEPARAMGL),1) |
---|
| 164 | PARAMGLLIB := -lparamgl$(LIBSUFFIX) |
---|
| 165 | endif |
---|
| 166 | |
---|
| 167 | ifeq ($(USERENDERCHECKGL),1) |
---|
| 168 | RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX) |
---|
| 169 | endif |
---|
| 170 | |
---|
| 171 | ifeq ($(USECUDPP), 1) |
---|
| 172 | ifeq "$(strip $(HP_64))" "" |
---|
| 173 | CUDPPLIB := -lcudpp |
---|
| 174 | else |
---|
| 175 | CUDPPLIB := -lcudpp64 |
---|
| 176 | endif |
---|
| 177 | |
---|
| 178 | CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX) |
---|
| 179 | |
---|
| 180 | ifeq ($(emu), 1) |
---|
| 181 | CUDPPLIB := $(CUDPPLIB)_emu |
---|
| 182 | endif |
---|
| 183 | endif |
---|
| 184 | |
---|
| 185 | # Libs |
---|
| 186 | ifeq "$(strip $(HP_64))" "" |
---|
| 187 | LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) |
---|
| 188 | else |
---|
| 189 | LIB := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) |
---|
| 190 | endif |
---|
| 191 | |
---|
| 192 | # If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB |
---|
| 193 | ifeq ($(USECUDADYNLIB),1) |
---|
| 194 | LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} -ldl -rdynamic |
---|
| 195 | else |
---|
| 196 | # static linking, we will statically link against CUDA and CUDART |
---|
| 197 | ifeq ($(USEDRVAPI),1) |
---|
| 198 | LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} |
---|
| 199 | else |
---|
| 200 | LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} |
---|
| 201 | endif |
---|
| 202 | endif |
---|
| 203 | |
---|
| 204 | ifeq ($(USECUFFT),1) |
---|
| 205 | ifeq ($(emu),1) |
---|
| 206 | LIB += -lcufftemu |
---|
| 207 | else |
---|
| 208 | LIB += -lcufft |
---|
| 209 | endif |
---|
| 210 | endif |
---|
| 211 | |
---|
| 212 | ifeq ($(USECUBLAS),1) |
---|
| 213 | ifeq ($(emu),1) |
---|
| 214 | LIB += -lcublasemu |
---|
| 215 | else |
---|
| 216 | LIB += -lcublas |
---|
| 217 | endif |
---|
| 218 | endif |
---|
| 219 | |
---|
| 220 | # Lib/exe configuration |
---|
| 221 | ifneq ($(STATIC_LIB),) |
---|
| 222 | TARGETDIR := $(LIBDIR) |
---|
| 223 | TARGET := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB)) |
---|
| 224 | LINKLINE = ar rucv $(TARGET) $(OBJS) |
---|
| 225 | else |
---|
| 226 | ifneq ($(OMIT_CUTIL_LIB),1) |
---|
| 227 | LIB += -lcutil$(LIBSUFFIX) |
---|
| 228 | endif |
---|
| 229 | # Device emulation configuration |
---|
| 230 | ifeq ($(emu), 1) |
---|
| 231 | NVCCFLAGS += -deviceemu |
---|
| 232 | CUDACCFLAGS += |
---|
| 233 | BINSUBDIR := emu$(BINSUBDIR) |
---|
| 234 | # consistency, makes developing easier |
---|
| 235 | CXXFLAGS += -D__DEVICE_EMULATION__ |
---|
| 236 | CFLAGS += -D__DEVICE_EMULATION__ |
---|
| 237 | endif |
---|
| 238 | TARGETDIR := $(BINDIR) |
---|
| 239 | TARGET := $(TARGETDIR)/$(EXECUTABLE) |
---|
| 240 | LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB) -L${SAGE_DIRECTORY}/lib -lsail |
---|
| 241 | endif |
---|
| 242 | |
---|
| 243 | # check if verbose |
---|
| 244 | ifeq ($(verbose), 1) |
---|
| 245 | VERBOSE := |
---|
| 246 | else |
---|
| 247 | VERBOSE := @ |
---|
| 248 | endif |
---|
| 249 | |
---|
| 250 | ################################################################################ |
---|
| 251 | # Check for input flags and set compiler flags appropriately |
---|
| 252 | ################################################################################ |
---|
| 253 | ifeq ($(fastmath), 1) |
---|
| 254 | NVCCFLAGS += -use_fast_math |
---|
| 255 | endif |
---|
| 256 | |
---|
| 257 | ifeq ($(keep), 1) |
---|
| 258 | NVCCFLAGS += -keep |
---|
| 259 | NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx |
---|
| 260 | endif |
---|
| 261 | |
---|
| 262 | ifdef maxregisters |
---|
| 263 | NVCCFLAGS += -maxrregcount $(maxregisters) |
---|
| 264 | endif |
---|
| 265 | |
---|
| 266 | # Add cudacc flags |
---|
| 267 | NVCCFLAGS += $(CUDACCFLAGS) |
---|
| 268 | |
---|
| 269 | # Add common flags |
---|
| 270 | NVCCFLAGS += $(COMMONFLAGS) |
---|
| 271 | CXXFLAGS += $(COMMONFLAGS) |
---|
| 272 | CFLAGS += $(COMMONFLAGS) |
---|
| 273 | |
---|
| 274 | ifeq ($(nvcc_warn_verbose),1) |
---|
| 275 | NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) |
---|
| 276 | NVCCFLAGS += --compiler-options -fno-strict-aliasing |
---|
| 277 | endif |
---|
| 278 | |
---|
| 279 | ################################################################################ |
---|
| 280 | # Set up object files |
---|
| 281 | ################################################################################ |
---|
| 282 | OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR) |
---|
| 283 | OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp.o,$(notdir $(CCFILES))) |
---|
| 284 | OBJS += $(patsubst %.c,$(OBJDIR)/%.c.o,$(notdir $(CFILES))) |
---|
| 285 | OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu.o,$(notdir $(CUFILES))) |
---|
| 286 | |
---|
| 287 | ################################################################################ |
---|
| 288 | # Set up cubin output files |
---|
| 289 | ################################################################################ |
---|
| 290 | CUBINDIR := $(SRCDIR)data |
---|
| 291 | CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES))) |
---|
| 292 | |
---|
| 293 | ################################################################################ |
---|
| 294 | # Set up PTX output files |
---|
| 295 | ################################################################################ |
---|
| 296 | PTXDIR := $(SRCDIR)data |
---|
| 297 | PTXBINS += $(patsubst %.cu,$(PTXDIR)/%.ptx,$(notdir $(PTXFILES))) |
---|
| 298 | |
---|
| 299 | ################################################################################ |
---|
| 300 | # Rules |
---|
| 301 | ################################################################################ |
---|
| 302 | $(OBJDIR)/%.c.o : $(SRCDIR)%.c $(C_DEPS) |
---|
| 303 | $(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $< |
---|
| 304 | |
---|
| 305 | $(OBJDIR)/%.cpp.o : $(SRCDIR)%.cpp $(C_DEPS) |
---|
| 306 | $(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $< |
---|
| 307 | |
---|
| 308 | $(OBJDIR)/%.cu.o : $(SRCDIR)%.cu $(CU_DEPS) |
---|
| 309 | $(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $< |
---|
| 310 | |
---|
| 311 | $(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory |
---|
| 312 | $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $< |
---|
| 313 | |
---|
| 314 | $(PTXDIR)/%.ptx : $(SRCDIR)%.cu ptxdirectory |
---|
| 315 | $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -ptx $< |
---|
| 316 | |
---|
| 317 | # |
---|
| 318 | # The following definition is a template that gets instantiated for each SM |
---|
| 319 | # version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things: |
---|
| 320 | # 1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX. |
---|
| 321 | # 2. It generates a rule for building .cu_sm_XX.o files from the corresponding |
---|
| 322 | # .cu file. |
---|
| 323 | # |
---|
| 324 | # The intended use for this is to allow Makefiles that use common.mk to compile |
---|
| 325 | # files to different Compute Capability targets (aka SM arch version). To do |
---|
| 326 | # so, in the Makefile, list files for each SM arch separately, like so: |
---|
| 327 | # |
---|
| 328 | # CUFILES_sm_10 := mycudakernel_sm10.cu app.cu |
---|
| 329 | # CUFILES_sm_12 := anothercudakernel_sm12.cu |
---|
| 330 | # |
---|
| 331 | define SMVERSION_template |
---|
| 332 | OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_$(1)))) |
---|
| 333 | $(OBJDIR)/%.cu_$(1).o : $(SRCDIR)%.cu $(CU_DEPS) |
---|
| 334 | $(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1) |
---|
| 335 | endef |
---|
| 336 | |
---|
| 337 | # This line invokes the above template for each arch version stored in |
---|
| 338 | # SM_VERSIONS. The call funtion invokes the template, and the eval |
---|
| 339 | # function interprets it as make commands. |
---|
| 340 | $(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver)))) |
---|
| 341 | |
---|
| 342 | $(TARGET): makedirectories $(OBJS) $(CUBINS) $(PTXBINS) Makefile |
---|
| 343 | $(VERBOSE)$(LINKLINE) |
---|
| 344 | |
---|
| 345 | install:$(TARGET) |
---|
| 346 | |
---|
| 347 | cubindirectory: |
---|
| 348 | $(VERBOSE)mkdir -p $(CUBINDIR) |
---|
| 349 | |
---|
| 350 | ptxdirectory: |
---|
| 351 | $(VERBOSE)mkdir -p $(PTXDIR) |
---|
| 352 | |
---|
| 353 | makedirectories: |
---|
| 354 | $(VERBOSE)mkdir -p $(LIBDIR) |
---|
| 355 | $(VERBOSE)mkdir -p $(OBJDIR) |
---|
| 356 | $(VERBOSE)mkdir -p $(TARGETDIR) |
---|
| 357 | |
---|
| 358 | |
---|
| 359 | tidy : |
---|
| 360 | $(VERBOSE)find . | egrep "#" | xargs rm -f |
---|
| 361 | $(VERBOSE)find . | egrep "\~" | xargs rm -f |
---|
| 362 | |
---|
| 363 | clean : tidy |
---|
| 364 | $(VERBOSE)rm -f $(OBJS) |
---|
| 365 | $(VERBOSE)rm -f $(CUBINS) |
---|
| 366 | $(VERBOSE)rm -f $(PTXBINS) |
---|
| 367 | $(VERBOSE)rm -f $(TARGET) |
---|
| 368 | $(VERBOSE)rm -f $(NVCC_KEEP_CLEAN) |
---|
| 369 | |
---|
| 370 | clobber : clean |
---|
| 371 | $(VERBOSE)rm -rf $(ROOTOBJDIR) |
---|
| 372 | |
---|