Commit 0f6ebe56 authored by Thomas White's avatar Thomas White
Browse files

Use OpenCL for much faster diffraction calculation

parent 89bd4cc2
......@@ -30,13 +30,24 @@ AC_ARG_WITH(gsl,
GSL_LIBS="-L$withval/lib -lgsl -lgslcblas"],
[GSL_LIBS="-lgsl -lgslcblas"])
AC_MSG_CHECKING([whether to use OpenCL])
AC_ARG_ENABLE(opencl,
[AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL])],
[OPENCL_CFLAGS=""
OPENCL_LIBS="-lOpenCL"
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available])
have_opencl=true],
[AC_MSG_RESULT([no])])
havegtk=false
AM_PATH_GTK_2_0(2.0.0, havegtk=true,
AC_MSG_WARN([GTK not found. hdfsee will not be built.]))
AM_CONDITIONAL([HAVE_GTK], test x$havegtk = xtrue)
AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue)
CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS"
LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS -lgthread-2.0 -lutil"
CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS $OPENCL_CFLAGS"
LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS $OPENCL_LIBS -lgthread-2.0 -lutil"
AC_OUTPUT(Makefile src/Makefile data/Makefile)
......@@ -2,4 +2,4 @@ hdfseedir = $(datadir)/hdfsee
hdfsee_DATA = displaywindow.ui
crystfeldir = $(datadir)/crystfel
crystfel_DATA = sfac/*
crystfel_DATA = sfac/* diffraction.cl
/*
* diffraction.cl
*
* GPU calculation kernel for truncated lattice diffraction
*
* (c) 2006-2010 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
#define INDMAX 30
#define IDIM (INDMAX*2 +1)
float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
float *ttp)
{
float rx, ry, r;
float ttx, tty, tt;
rx = ((float)x - cx)/res;
ry = ((float)y - cy)/res;
r = sqrt(pow(rx, 2.0) + pow(ry, 2.0));
ttx = atan2(rx, clen);
tty = atan2(ry, clen);
tt = atan2(r, clen);
*ttp = tt;
return (float4)(k*sin(ttx), k*sin(tty), k-k*cos(tt), 0.0);
}
float lattice_factor(float16 cell, float4 q)
{
float f1, f2, f3;
float4 Udotq;
const int na = 8;
const int nb = 8;
const int nc = 8;
Udotq.x = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z;
Udotq.y = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z;
Udotq.z = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z;
/* At exact Bragg condition, f1 = na */
f1 = sin(M_PI*(float)na*Udotq.x) / sin(M_PI*Udotq.x);
/* At exact Bragg condition, f2 = nb */
f2 = sin(M_PI*(float)nb*Udotq.y) / sin(M_PI*Udotq.y);
/* At exact Bragg condition, f3 = nc */
f3 = sin(M_PI*(float)nc*Udotq.z) / sin(M_PI*Udotq.z);
/* At exact Bragg condition, this will multiply the molecular
* part of the structure factor by the number of unit cells,
* as desired (more scattering from bigger crystal!) */
return f1 * f2 * f3;
}
float2 get_sfac(global float2 *sfacs, float16 cell, float4 q)
{
signed int h, k, l;
int idx;
h = rint(cell.s0*q.x + cell.s1*q.y + cell.s2*q.z); /* h */
k = rint(cell.s3*q.x + cell.s4*q.y + cell.s5*q.z); /* k */
l = rint(cell.s6*q.x + cell.s7*q.y + cell.s8*q.z); /* l */
if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
return 100.0;
}
if ( h < 0 ) h += IDIM;
if ( k < 0 ) k += IDIM;
if ( l < 0 ) l += IDIM;
idx = h + (IDIM*k) + (IDIM*IDIM*l);
return sfacs[idx];
}
kernel void diffraction(global float2 *diff, global float *tt, float k,
int w, float cx, float cy,
float res, float clen, float16 cell,
global float2 *sfacs)
{
float ttv;
const int x = get_global_id(0);
const int y = get_global_id(1);
float f_lattice;
float2 f_molecule;
float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv);
f_lattice = lattice_factor(cell, q);
f_molecule = get_sfac(sfacs, cell, q);
diff[x+w*y] = f_molecule*f_lattice;
tt[x+w*y] = ttv;
}
......@@ -10,6 +10,10 @@ AM_CPPFLAGS = -DDATADIR=\""$(datadir)"\"
pattern_sim_SOURCES = pattern_sim.c diffraction.c utils.c image.c cell.c \
hdf5-file.c detector.c sfac.c intensities.c \
reflections.c
if HAVE_OPENCL
pattern_sim_SOURCES += diffraction-gpu.c
endif
pattern_sim_LDADD = @LIBS@
process_hkl_SOURCES = process_hkl.c sfac.c statistics.c cell.c utils.c \
......
/*
* diffraction-gpu.c
*
* Calculate diffraction patterns by Fourier methods (GPU version)
*
* (c) 2006-2010 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <complex.h>
#include <CL/cl.h>
#include "image.h"
#include "utils.h"
#include "cell.h"
#include "diffraction.h"
#include "sfac.h"
#define SAMPLING (5)
#define BWSAMPLING (10)
#define BANDWIDTH (1.0 / 100.0)
static cl_device_id get_first_dev(cl_context ctx)
{
cl_device_id dev;
cl_int r;
r = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sizeof(dev), &dev, NULL);
if ( r != CL_SUCCESS ) {
ERROR("Couldn't get device\n");
return 0;
}
return dev;
}
static void show_build_log(cl_program prog, cl_device_id dev)
{
cl_int r;
char log[4096];
size_t s;
r = clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 4096, log,
&s);
STATUS("%s\n", log);
}
static cl_program load_program(const char *filename, cl_context ctx,
cl_device_id dev, cl_int *err)
{
FILE *fh;
cl_program prog;
char *source;
size_t len;
cl_int r;
fh = fopen(filename, "r");
if ( fh == NULL ) {
ERROR("Couldn't open '%s'\n", filename);
*err = CL_INVALID_PROGRAM;
return 0;
}
source = malloc(16384);
len = fread(source, 1, 16383, fh);
fclose(fh);
source[len] = '\0';
prog = clCreateProgramWithSource(ctx, 1, (const char **)&source,
NULL, err);
if ( *err != CL_SUCCESS ) {
ERROR("Couldn't load source\n");
return 0;
}
r = clBuildProgram(prog, 0, NULL, "-Werror", NULL, NULL);
if ( r != CL_SUCCESS ) {
ERROR("Couldn't build program\n");
show_build_log(prog, dev);
*err = r;
return 0;
}
*err = CL_SUCCESS;
return prog;
}
void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
int no_sfac)
{
cl_uint nplat;
cl_platform_id platforms[8];
cl_context_properties prop[3];
cl_context ctx;
cl_int err;
cl_command_queue cq;
cl_program prog;
cl_device_id dev;
cl_kernel kern;
double ax, ay, az;
double bx, by, bz;
double cx, cy, cz;
double a, b, c, d;
float kc;
const size_t dims[2] = {1024, 1024};
cl_mem sfacs;
size_t sfac_size;
float *sfac_ptr;
cl_mem tt;
size_t tt_size;
float *tt_ptr;
int x, y;
cl_float16 cell;
cl_mem diff;
size_t diff_size;
float *diff_ptr;
int i;
if ( image->molecule == NULL ) return;
/* Generate structure factors if required */
if ( !no_sfac ) {
if ( image->molecule->reflections == NULL ) {
get_reflections_cached(image->molecule,
ph_lambda_to_en(image->lambda));
}
}
cell_get_cartesian(image->molecule->cell, &ax, &ay, &az,
&bx, &by, &bz,
&cx, &cy, &cz);
cell[0] = ax; cell[1] = ay; cell[2] = az;
cell[3] = bx; cell[4] = by; cell[5] = bz;
cell[6] = cx; cell[7] = cy; cell[8] = cz;
cell_get_parameters(image->molecule->cell,
&a, &b, &c, &d, &d, &d);
STATUS("Particle size = %i x %i x %i (=%5.2f x %5.2f x %5.2f nm)\n",
na, nb, nc, na*a/1.0e-9, nb*b/1.0e-9, nc*c/1.0e-9);
err = clGetPlatformIDs(8, platforms, &nplat);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't get platform IDs: %i\n", err);
return;
}
STATUS("%i platforms\n", nplat);
prop[0] = CL_CONTEXT_PLATFORM;
prop[1] = (cl_context_properties)platforms[0];
prop[2] = 0;
ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't create OpenCL context: %i\n", err);
switch ( err ) {
case CL_INVALID_PLATFORM :
ERROR("Invalid platform\n");
break;
case CL_OUT_OF_HOST_MEMORY :
ERROR("Out of memory\n");
break;
}
return;
}
dev = get_first_dev(ctx);
cq = clCreateCommandQueue(ctx, dev, 0, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't create OpenCL command queue\n");
return;
}
/* Create buffer for the picture */
diff_size = image->width*image->height*sizeof(cl_float)*2; /* complex */
diff = clCreateBuffer(ctx, CL_MEM_READ_WRITE, diff_size, NULL, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't allocate diffraction memory\n");
return;
}
/* Create a single-precision version of the scattering factors */
sfac_size = IDIM*IDIM*sizeof(cl_float)*2; /* complex */
sfac_ptr = malloc(IDIM*IDIM*sizeof(cl_float)*2);
for ( i=0; i<IDIM*IDIM; i++ ) {
sfac_ptr[2*i+0] = creal(image->molecule->reflections[i]);
sfac_ptr[2*i+1] = cimag(image->molecule->reflections[i]);
}
sfacs = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
sfac_size, sfac_ptr, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't allocate sfac memory\n");
return;
}
tt_size = image->width*image->height*sizeof(cl_float);
tt = clCreateBuffer(ctx, CL_MEM_READ_WRITE, tt_size, NULL, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't allocate twotheta memory\n");
return;
}
prog = load_program(DATADIR"/crystfel/diffraction.cl", ctx, dev, &err);
if ( err != CL_SUCCESS ) {
return;
}
kern = clCreateKernel(prog, "diffraction", &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't create kernel\n");
return;
}
/* Calculate wavelength */
kc = 1.0/image->lambda; /* Centre value */
clSetKernelArg(kern, 0, sizeof(cl_mem), &diff);
clSetKernelArg(kern, 1, sizeof(cl_mem), &tt);
clSetKernelArg(kern, 2, sizeof(cl_float), &kc);
clSetKernelArg(kern, 3, sizeof(cl_int), &image->width);
clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx);
clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy);
clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution);
clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len);
clSetKernelArg(kern, 8, sizeof(cl_float16), &cell);
clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs);
STATUS("Running...\n");
err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL,
0, NULL, NULL);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't enqueue kernel\n");
return;
}
diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0,
diff_size, 0, NULL, NULL, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't map sfac buffer\n");
return;
}
tt_ptr = clEnqueueMapBuffer(cq, tt, CL_TRUE, CL_MAP_READ, 0,
tt_size, 0, NULL, NULL, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't map tt buffer\n");
return;
}
STATUS("Done!\n");
image->sfacs = calloc(image->width * image->height,
sizeof(double complex));
image->twotheta = calloc(image->width * image->height, sizeof(double));
for ( x=0; x<image->width; x++ ) {
for ( y=0; y<image->height; y++ ) {
float re, im, tt;
re = diff_ptr[2*(x + image->width*y)+0];
im = diff_ptr[2*(x + image->width*y)+1];
tt = tt_ptr[x + image->width*y];
image->sfacs[x + image->width*y] = re + I*im;
image->twotheta[x + image->width*y] = tt;
}
}
clReleaseProgram(prog);
clReleaseMemObject(sfacs);
clReleaseCommandQueue(cq);
clReleaseContext(ctx);
}
/*
* diffraction-gpu.h
*
* Calculate diffraction patterns by Fourier methods (GPU version)
*
* (c) 2006-2010 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifndef DIFFRACTION_GPU_H
#define DIFFRACTION_GPU_H
#include "image.h"
#include "cell.h"
#if HAVE_OPENCL
extern void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
int nosfac);
#else
static void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
int nosfac)
{
/* Do nothing */
ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n");
}
#endif
#endif /* DIFFRACTION_GPU_H */
......@@ -23,9 +23,9 @@
#include "sfac.h"
#define SAMPLING (5)
#define BWSAMPLING (10)
#define BANDWIDTH (1.0 / 100.0)
#define SAMPLING (1)
#define BWSAMPLING (1)
#define BANDWIDTH (0.0 / 100.0)
static double lattice_factor(struct rvec q, double ax, double ay, double az,
......
......@@ -88,8 +88,8 @@ struct image {
* If FORMULATION_PIXELSIZE, then pixel_size only is needed.*/
FormulationMode fmode;
double pixel_size;
double camera_len;
double resolution; /* pixels per metre */
float camera_len;
float resolution; /* pixels per metre */
/* Wavelength must always be given */
double lambda; /* Wavelength in m */
......
......@@ -21,6 +21,7 @@
#include "image.h"
#include "diffraction.h"
#include "diffraction-gpu.h"
#include "cell.h"
#include "utils.h"
#include "hdf5-file.h"
......@@ -38,6 +39,7 @@ static void show_help(const char *s)
"\n"
" -h, --help Display this help message.\n"
" --simulation-details Show technical details of the simulation.\n"
" --gpu Use the GPU to speed up the calculation.\n"
"\n"
" --near-bragg Output h,k,l,I near Bragg conditions.\n"
" -n, --number=<N> Generate N images. Default 1.\n"
......@@ -157,6 +159,7 @@ int main(int argc, char *argv[])
int config_nonoise = 0;
int config_nobloom = 0;
int config_nosfac = 0;
int config_gpu = 0;
int ndone = 0; /* Number of simulations done (images or not) */
int number = 1; /* Number used for filename of image */
int n_images = 1; /* Generate one image by default */
......@@ -166,6 +169,7 @@ int main(int argc, char *argv[])
const struct option longopts[] = {
{"help", 0, NULL, 'h'},
{"simulation-details", 0, &config_simdetails, 1},
{"gpu", 0, &config_gpu, 1},
{"near-bragg", 0, &config_nearbragg, 1},
{"random-orientation", 0, NULL, 'r'},
{"number", 1, NULL, 'n'},
......@@ -274,11 +278,20 @@ int main(int argc, char *argv[])
image.twotheta = NULL;
image.hdr = NULL;
get_diffraction(&image, na, nb, nc, config_nosfac);
if ( config_gpu ) {
get_diffraction_gpu(&image, na, nb, nc, config_nosfac);
} else {
get_diffraction(&image, na, nb, nc, config_nosfac);
}
if ( image.molecule == NULL ) {
ERROR("Couldn't open molecule.pdb\n");
return 1;
}
if ( image.sfacs == NULL ) {
ERROR("Diffraction calculation failed.\n");
goto skip;
}
record_image(&image, !config_nowater, !config_nonoise,
!config_nobloom);
......@@ -305,6 +318,7 @@ int main(int argc, char *argv[])
free(image.sfacs);
free(image.twotheta);
skip:
ndone++;
if ( n_images && (ndone >= n_images) ) done = 1;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment