1 | ################################################################################ |
---|
2 | # |
---|
3 | # Copyright 1993-2006 NVIDIA Corporation. All rights reserved. |
---|
4 | # |
---|
5 | # NOTICE TO USER: |
---|
6 | # |
---|
7 | # This source code is subject to NVIDIA ownership rights under U.S. and |
---|
8 | # international Copyright laws. |
---|
9 | # |
---|
10 | # NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE |
---|
11 | # CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR |
---|
12 | # IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH |
---|
13 | # REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF |
---|
14 | # MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. |
---|
15 | # IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, |
---|
16 | # OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS |
---|
17 | # OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE |
---|
18 | # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE |
---|
19 | # OR PERFORMANCE OF THIS SOURCE CODE. |
---|
20 | # |
---|
21 | # U.S. Government End Users. This source code is a "commercial item" as |
---|
22 | # that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of |
---|
23 | # "commercial computer software" and "commercial computer software |
---|
24 | # documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) |
---|
25 | # and is provided to the U.S. Government only as a commercial end item. |
---|
26 | # Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through |
---|
27 | # 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the |
---|
28 | # source code with only those rights set forth herein. |
---|
29 | # |
---|
30 | ################################################################################ |
---|
31 | # |
---|
32 | # Common build script |
---|
33 | # |
---|
34 | ################################################################################ |
---|
35 | |
---|
36 | .SUFFIXES : .cu .cu_dbg.o .c_dbg.o .cpp_dbg.o .cu_rel.o .c_rel.o .cpp_rel.o .cubin .ptx |
---|
37 | |
---|
38 | # Add new SM Versions here as devices with new Compute Capability are released |
---|
39 | SM_VERSIONS := sm_10 sm_11 sm_12 sm_13 |
---|
40 | |
---|
41 | CUDA_INSTALL_PATH ?= /usr/local/cuda |
---|
42 | |
---|
43 | ifdef cuda-install |
---|
44 | CUDA_INSTALL_PATH := $(cuda-install) |
---|
45 | endif |
---|
46 | |
---|
47 | # detect OS |
---|
48 | OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) |
---|
49 | OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:]) |
---|
50 | # 'linux' is output for Linux system, 'darwin' for OS X |
---|
51 | DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) |
---|
52 | |
---|
53 | # detect if 32 bit or 64 bit system |
---|
54 | HP_64 = $(shell uname -m | grep 64) |
---|
55 | |
---|
56 | # Basic directory setup for SDK |
---|
57 | # (override directories only if they are not already defined) |
---|
58 | SRCDIR ?= |
---|
59 | ROOTDIR ?= ${HOME}/NVIDIA_GPU_Computing_SDK/C |
---|
60 | ROOTBINDIR ?= ${SAGE_DIRECTORY}/bin |
---|
61 | BINDIR ?= $(ROOTBINDIR) |
---|
62 | ROOTOBJDIR ?= obj |
---|
63 | LIBDIR := $(ROOTDIR)/lib |
---|
64 | COMMONDIR := $(ROOTDIR)/common |
---|
65 | |
---|
66 | # Compilers |
---|
67 | ifeq "$(strip $(HP_64))" "" |
---|
68 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc |
---|
69 | else |
---|
70 | NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc |
---|
71 | endif |
---|
72 | |
---|
73 | CXX := g++ |
---|
74 | CC := gcc |
---|
75 | LINK := g++ -fPIC |
---|
76 | |
---|
77 | # Includes |
---|
78 | INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc |
---|
79 | |
---|
80 | # architecture flag for cubin build |
---|
81 | CUBIN_ARCH_FLAG := |
---|
82 | |
---|
83 | # Warning flags |
---|
84 | CXXWARN_FLAGS := \ |
---|
85 | -W -Wall \ |
---|
86 | -Wimplicit \ |
---|
87 | -Wswitch \ |
---|
88 | -Wformat \ |
---|
89 | -Wchar-subscripts \ |
---|
90 | -Wparentheses \ |
---|
91 | -Wmultichar \ |
---|
92 | -Wtrigraphs \ |
---|
93 | -Wpointer-arith \ |
---|
94 | -Wcast-align \ |
---|
95 | -Wreturn-type \ |
---|
96 | -Wno-unused-function \ |
---|
97 | $(SPACE) |
---|
98 | |
---|
99 | CWARN_FLAGS := $(CXXWARN_FLAGS) \ |
---|
100 | -Wstrict-prototypes \ |
---|
101 | -Wmissing-prototypes \ |
---|
102 | -Wmissing-declarations \ |
---|
103 | -Wnested-externs \ |
---|
104 | -Wmain \ |
---|
105 | |
---|
106 | # Compiler-specific flags |
---|
107 | NVCCFLAGS := |
---|
108 | CXXFLAGS := $(CXXWARN_FLAGS) |
---|
109 | CFLAGS := $(CWARN_FLAGS) |
---|
110 | |
---|
111 | # Common flags |
---|
112 | COMMONFLAGS += $(INCLUDES) -DUNIX |
---|
113 | |
---|
114 | # Debug/release configuration |
---|
115 | ifeq ($(dbg),1) |
---|
116 | COMMONFLAGS += -g |
---|
117 | NVCCFLAGS += -D_DEBUG |
---|
118 | CXXFLAGS += -D_DEBUG |
---|
119 | CFLAGS += -D_DEBUG |
---|
120 | BINSUBDIR := debug |
---|
121 | LIBSUFFIX := D |
---|
122 | else |
---|
123 | COMMONFLAGS += -O2 |
---|
124 | BINSUBDIR := release |
---|
125 | LIBSUFFIX := |
---|
126 | NVCCFLAGS += --compiler-options -fno-strict-aliasing |
---|
127 | CXXFLAGS += -fno-strict-aliasing |
---|
128 | CFLAGS += -fno-strict-aliasing |
---|
129 | endif |
---|
130 | |
---|
131 | # append optional arch/SM version flags (such as -arch sm_11) |
---|
132 | #NVCCFLAGS += $(SMVERSIONFLAGS) |
---|
133 | |
---|
134 | # architecture flag for cubin build |
---|
135 | CUBIN_ARCH_FLAG := |
---|
136 | |
---|
137 | # OpenGL is used or not (if it is used, then it is necessary to include GLEW) |
---|
138 | ifeq ($(USEGLLIB),1) |
---|
139 | |
---|
140 | ifneq ($(DARWIN),) |
---|
141 | OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a |
---|
142 | else |
---|
143 | OPENGLLIB := -lGL -lGLU -lX11 -lXi -lXmu |
---|
144 | |
---|
145 | ifeq "$(strip $(HP_64))" "" |
---|
146 | OPENGLLIB += -lGLEW -L/usr/X11R6/lib |
---|
147 | else |
---|
148 | OPENGLLIB += -lGLEW_x86_64 -L/usr/X11R6/lib64 |
---|
149 | endif |
---|
150 | endif |
---|
151 | |
---|
152 | CUBIN_ARCH_FLAG := -m64 |
---|
153 | endif |
---|
154 | |
---|
155 | ifeq ($(USEGLUT),1) |
---|
156 | ifneq ($(DARWIN),) |
---|
157 | OPENGLLIB += -framework GLUT |
---|
158 | else |
---|
159 | OPENGLLIB += -lglut |
---|
160 | endif |
---|
161 | endif |
---|
162 | |
---|
163 | ifeq ($(USEPARAMGL),1) |
---|
164 | PARAMGLLIB := -lparamgl$(LIBSUFFIX) |
---|
165 | endif |
---|
166 | |
---|
167 | ifeq ($(USERENDERCHECKGL),1) |
---|
168 | RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX) |
---|
169 | endif |
---|
170 | |
---|
171 | ifeq ($(USECUDPP), 1) |
---|
172 | ifeq "$(strip $(HP_64))" "" |
---|
173 | CUDPPLIB := -lcudpp |
---|
174 | else |
---|
175 | CUDPPLIB := -lcudpp64 |
---|
176 | endif |
---|
177 | |
---|
178 | CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX) |
---|
179 | |
---|
180 | ifeq ($(emu), 1) |
---|
181 | CUDPPLIB := $(CUDPPLIB)_emu |
---|
182 | endif |
---|
183 | endif |
---|
184 | |
---|
185 | # Libs |
---|
186 | ifeq "$(strip $(HP_64))" "" |
---|
187 | LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) |
---|
188 | else |
---|
189 | LIB := -L$(CUDA_INSTALL_PATH)/lib64 -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) |
---|
190 | endif |
---|
191 | |
---|
192 | # If dynamically linking to CUDA and CUDART, we exclude the libraries from the LIB |
---|
193 | ifeq ($(USECUDADYNLIB),1) |
---|
194 | LIB += ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} -ldl -rdynamic |
---|
195 | else |
---|
196 | # static linking, we will statically link against CUDA and CUDART |
---|
197 | ifeq ($(USEDRVAPI),1) |
---|
198 | LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} |
---|
199 | else |
---|
200 | LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} |
---|
201 | endif |
---|
202 | endif |
---|
203 | |
---|
204 | ifeq ($(USECUFFT),1) |
---|
205 | ifeq ($(emu),1) |
---|
206 | LIB += -lcufftemu |
---|
207 | else |
---|
208 | LIB += -lcufft |
---|
209 | endif |
---|
210 | endif |
---|
211 | |
---|
212 | ifeq ($(USECUBLAS),1) |
---|
213 | ifeq ($(emu),1) |
---|
214 | LIB += -lcublasemu |
---|
215 | else |
---|
216 | LIB += -lcublas |
---|
217 | endif |
---|
218 | endif |
---|
219 | |
---|
220 | # Lib/exe configuration |
---|
221 | ifneq ($(STATIC_LIB),) |
---|
222 | TARGETDIR := $(LIBDIR) |
---|
223 | TARGET := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB)) |
---|
224 | LINKLINE = ar rucv $(TARGET) $(OBJS) |
---|
225 | else |
---|
226 | ifneq ($(OMIT_CUTIL_LIB),1) |
---|
227 | LIB += -lcutil$(LIBSUFFIX) |
---|
228 | endif |
---|
229 | # Device emulation configuration |
---|
230 | ifeq ($(emu), 1) |
---|
231 | NVCCFLAGS += -deviceemu |
---|
232 | CUDACCFLAGS += |
---|
233 | BINSUBDIR := emu$(BINSUBDIR) |
---|
234 | # consistency, makes developing easier |
---|
235 | CXXFLAGS += -D__DEVICE_EMULATION__ |
---|
236 | CFLAGS += -D__DEVICE_EMULATION__ |
---|
237 | endif |
---|
238 | TARGETDIR := $(BINDIR) |
---|
239 | TARGET := $(TARGETDIR)/$(EXECUTABLE) |
---|
240 | LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB) -L${SAGE_DIRECTORY}/lib -lsail |
---|
241 | endif |
---|
242 | |
---|
243 | # check if verbose |
---|
244 | ifeq ($(verbose), 1) |
---|
245 | VERBOSE := |
---|
246 | else |
---|
247 | VERBOSE := @ |
---|
248 | endif |
---|
249 | |
---|
250 | ################################################################################ |
---|
251 | # Check for input flags and set compiler flags appropriately |
---|
252 | ################################################################################ |
---|
253 | ifeq ($(fastmath), 1) |
---|
254 | NVCCFLAGS += -use_fast_math |
---|
255 | endif |
---|
256 | |
---|
257 | ifeq ($(keep), 1) |
---|
258 | NVCCFLAGS += -keep |
---|
259 | NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx |
---|
260 | endif |
---|
261 | |
---|
262 | ifdef maxregisters |
---|
263 | NVCCFLAGS += -maxrregcount $(maxregisters) |
---|
264 | endif |
---|
265 | |
---|
266 | # Add cudacc flags |
---|
267 | NVCCFLAGS += $(CUDACCFLAGS) |
---|
268 | |
---|
269 | # Add common flags |
---|
270 | NVCCFLAGS += $(COMMONFLAGS) |
---|
271 | CXXFLAGS += $(COMMONFLAGS) |
---|
272 | CFLAGS += $(COMMONFLAGS) |
---|
273 | |
---|
274 | ifeq ($(nvcc_warn_verbose),1) |
---|
275 | NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) |
---|
276 | NVCCFLAGS += --compiler-options -fno-strict-aliasing |
---|
277 | endif |
---|
278 | |
---|
279 | ################################################################################ |
---|
280 | # Set up object files |
---|
281 | ################################################################################ |
---|
282 | OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR) |
---|
283 | OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp.o,$(notdir $(CCFILES))) |
---|
284 | OBJS += $(patsubst %.c,$(OBJDIR)/%.c.o,$(notdir $(CFILES))) |
---|
285 | OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu.o,$(notdir $(CUFILES))) |
---|
286 | |
---|
287 | ################################################################################ |
---|
288 | # Set up cubin output files |
---|
289 | ################################################################################ |
---|
290 | CUBINDIR := $(SRCDIR)data |
---|
291 | CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES))) |
---|
292 | |
---|
293 | ################################################################################ |
---|
294 | # Set up PTX output files |
---|
295 | ################################################################################ |
---|
296 | PTXDIR := $(SRCDIR)data |
---|
297 | PTXBINS += $(patsubst %.cu,$(PTXDIR)/%.ptx,$(notdir $(PTXFILES))) |
---|
298 | |
---|
299 | ################################################################################ |
---|
300 | # Rules |
---|
301 | ################################################################################ |
---|
302 | $(OBJDIR)/%.c.o : $(SRCDIR)%.c $(C_DEPS) |
---|
303 | $(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $< |
---|
304 | |
---|
305 | $(OBJDIR)/%.cpp.o : $(SRCDIR)%.cpp $(C_DEPS) |
---|
306 | $(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $< |
---|
307 | |
---|
308 | $(OBJDIR)/%.cu.o : $(SRCDIR)%.cu $(CU_DEPS) |
---|
309 | $(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $< |
---|
310 | |
---|
311 | $(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory |
---|
312 | $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $< |
---|
313 | |
---|
314 | $(PTXDIR)/%.ptx : $(SRCDIR)%.cu ptxdirectory |
---|
315 | $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -ptx $< |
---|
316 | |
---|
317 | # |
---|
318 | # The following definition is a template that gets instantiated for each SM |
---|
319 | # version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things: |
---|
320 | # 1. It adds to OBJS a .cu_sm_XX.o for each .cu file it finds in CUFILES_sm_XX. |
---|
321 | # 2. It generates a rule for building .cu_sm_XX.o files from the corresponding |
---|
322 | # .cu file. |
---|
323 | # |
---|
324 | # The intended use for this is to allow Makefiles that use common.mk to compile |
---|
325 | # files to different Compute Capability targets (aka SM arch version). To do |
---|
326 | # so, in the Makefile, list files for each SM arch separately, like so: |
---|
327 | # |
---|
328 | # CUFILES_sm_10 := mycudakernel_sm10.cu app.cu |
---|
329 | # CUFILES_sm_12 := anothercudakernel_sm12.cu |
---|
330 | # |
---|
331 | define SMVERSION_template |
---|
332 | OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1).o,$(notdir $(CUFILES_$(1)))) |
---|
333 | $(OBJDIR)/%.cu_$(1).o : $(SRCDIR)%.cu $(CU_DEPS) |
---|
334 | $(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1) |
---|
335 | endef |
---|
336 | |
---|
337 | # This line invokes the above template for each arch version stored in |
---|
338 | # SM_VERSIONS. The call funtion invokes the template, and the eval |
---|
339 | # function interprets it as make commands. |
---|
340 | $(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver)))) |
---|
341 | |
---|
342 | $(TARGET): makedirectories $(OBJS) $(CUBINS) $(PTXBINS) Makefile |
---|
343 | $(VERBOSE)$(LINKLINE) |
---|
344 | |
---|
345 | install:$(TARGET) |
---|
346 | |
---|
347 | cubindirectory: |
---|
348 | $(VERBOSE)mkdir -p $(CUBINDIR) |
---|
349 | |
---|
350 | ptxdirectory: |
---|
351 | $(VERBOSE)mkdir -p $(PTXDIR) |
---|
352 | |
---|
353 | makedirectories: |
---|
354 | $(VERBOSE)mkdir -p $(LIBDIR) |
---|
355 | $(VERBOSE)mkdir -p $(OBJDIR) |
---|
356 | $(VERBOSE)mkdir -p $(TARGETDIR) |
---|
357 | |
---|
358 | |
---|
359 | tidy : |
---|
360 | $(VERBOSE)find . | egrep "#" | xargs rm -f |
---|
361 | $(VERBOSE)find . | egrep "\~" | xargs rm -f |
---|
362 | |
---|
363 | clean : tidy |
---|
364 | $(VERBOSE)rm -f $(OBJS) |
---|
365 | $(VERBOSE)rm -f $(CUBINS) |
---|
366 | $(VERBOSE)rm -f $(PTXBINS) |
---|
367 | $(VERBOSE)rm -f $(TARGET) |
---|
368 | $(VERBOSE)rm -f $(NVCC_KEEP_CLEAN) |
---|
369 | |
---|
370 | clobber : clean |
---|
371 | $(VERBOSE)rm -rf $(ROOTOBJDIR) |
---|
372 | |
---|