# where is the CUDA Toolkit installed?
CUDA_DIR = /usr/local/cuda
CUDA_INCLUDE = -I$(CUDA_DIR)/include/
CUDA_LIB = -L$(CUDA_DIR)/lib64/

# Compiler settings for .c files (CPU)
CC = gcc
CFLAGS = -Wall -Wextra -O2 $(CUDA_INCLUDE) -malign-double

# Compiler settings for .cu files (CPU/GPU)
NVCC = nvcc
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
# The following should generate PTX code that will run on any GPU virtual architecture from Kepler on via JIT assembly.
NVCCFLAGS += --gpu-architecture=compute_30
# The following should assemble and optimize the PTX for the Turing GPU architecture, and include it in the binary.
# Additional GPU architectures can be added here. See the GPU Compilation documentation on the NVIDIA site.
NVCCFLAGS += --gpu-code=sm_75
# Pass some options to the C host compiler (e.g. gcc on Linux)
NVCCFLAGS += --compiler-options=-Wall
# Alternatively, try a line the following to use an older version of gcc (required by some CUDA SDKs)
# NVCCFLAGS += --compiler-options="-Wall -B/usr/lib/gcc/x86_64-linux-gnu/4.5.3/"

# Linker
LD = gcc
LDFLAGS = -fPIC $(CUDA_LIB)
MMFFLIB = -lcudart -lstdc++ -lm

##############################################################################

CSRC  = timer.c parse.c read_config.c mfaktc.c checkpoint.c signal_handler.c output.c
CUSRC = tf_barrett96_gs.cu gpusieve.cu

COBJS  = $(CSRC:.c=.o)
CUOBJS = $(CUSRC:.cu=.o)

##############################################################################

all: ../mmff.exe

../mmff.exe : $(COBJS) $(CUOBJS)
	$(LD) $(LDFLAGS) $^ $(MMFFLIB) -o $@

clean :
	rm -f *.o *~

%.o : %.cu
	$(NVCC) $(NVCCFLAGS) -c $< -o $@

%.o : %.c
	$(CC) $(CFLAGS) -c $< -o $@

##############################################################################

# dependencies generated by cpp -MM
checkpoint.o: checkpoint.c params.h

mfaktc.o: mfaktc.c params.h my_types.h compatibility.h \
 read_config.h parse.h timer.h checkpoint.h signal_handler.h output.h

output.o: output.c params.h my_types.h output.h compatibility.h tf_validate.h

parse.o: parse.c compatibility.h parse.h

read_config.o: read_config.c params.h my_types.h

signal_handler.o: signal_handler.c params.h my_types.h compatibility.h

timer.o: timer.c timer.h compatibility.h

tf_barrett96_gs.o: tf_barrett96_gs.cu params.h my_types.h compatibility.h \
 gpusieve.h my_intrinsics.h timer.h output.h \
 tf_barrett96_div.cu tf_common_gs.cu \
 tf_192.h tf_224.h tf_256.h tf_160.h tf_128.h tf_96.h tf_m127.h tf_m107.h tf_m89.h tf_m61.h tf_m31.h \
 tf_f0_31.h tf_f32_63.h tf_f64_95.h tf_f96_127.h tf_f128_159.h tf_f160_191.h tf_f192_223.h

gpusieve.o: gpusieve.cu gpusieve.h params.h my_types.h

