Commit c708b216 authored by Thomas White's avatar Thomas White
Browse files

Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)

parent 356b10b5
......@@ -18,6 +18,9 @@
/* Define to 1 if Cairo is available */
#undef HAVE_CAIRO
/* Define to 1 if CL/cl.h should be used */
#undef HAVE_CL_CL_H
/* Define to 1 if you have the declaration of `strndup', and to 0 if you
don't. */
#undef HAVE_DECL_STRNDUP
......@@ -157,10 +160,6 @@
/* Define if you have the 'wchar_t' type. */
#undef HAVE_WCHAR_T
/* Define to 1 if you have the
</System/Library/Frameworks/OpenCL.framework/Headers/cl.h> header file. */
#undef HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H
/* Define to a substitute value for mmap()'s MAP_ANONYMOUS flag. */
#undef MAP_ANONYMOUS
......
......@@ -809,7 +809,7 @@ enable_silent_rules
with_hdf5
with_gsl
enable_gsl_fudge
enable_opencl
with_opencl
enable_gtk
enable_gtktest
enable_png
......@@ -1464,7 +1464,6 @@ Optional Features:
--disable-silent-rules verbose build output (undo: `make V=0')
--enable-gsl-fudge Use this option if you get lots of linker errors
concerning GSL and CBLAS
--enable-opencl Enable the use of OpenCL
--disable-gtk Disable GTK+/GLib
--disable-gtktest do not try to compile and run a test GTK+ program
--disable-png Disable the use of libPNG
......@@ -1477,6 +1476,7 @@ Optional Packages:
--with-hdf5 specify location of HDF5 library
--with-gsl specify location of GSL (instead of using
pkg-config)
--with-opencl Use OpenCL
--with-libtiff specify location of libTIFF library
Some influential environment variables:
......@@ -6442,37 +6442,56 @@ $as_echo "no" >&6; }
fi
# Check whether --enable-opencl was given.
if test "${enable_opencl+set}" = set; then :
enableval=$enable_opencl;
# Check whether --with-opencl was given.
if test "${with_opencl+set}" = set; then :
withval=$with_opencl;
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use OpenCL" >&5
$as_echo_n "checking whether to use OpenCL... " >&6; }
if test "x$enable_opencl" == "xyes"; then :
if test "x$with_opencl" == "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
for ac_header in /System/Library/Frameworks/OpenCL.framework/Headers/cl.h
do :
ac_fn_c_check_header_mongrel "$LINENO" "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" "ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" "$ac_includes_default"
if test "x$ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" = x""yes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H 1
_ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking cl.h" >&5
$as_echo_n "checking cl.h... " >&6; }
if test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: /System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&5
$as_echo "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&6; }
OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
OPENCL_LIBS="-framework OpenCL"
else
fi
if test -f /usr/local/cuda/cuda/include/CL/cl.h; then :
OPENCL_CFLAGS="-I/usr/include/CL"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/cuda/include/CL/cl.h" >&5
$as_echo "/usr/local/cuda/cuda/include/CL/cl.h" >&6; }
OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
fi
if test -f /usr/local/cuda/include/CL/cl.h; then :
done
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/include/CL/cl.h" >&5
$as_echo "/usr/local/cuda/include/CL/cl.h" >&6; }
OPENCL_CFLAGS="-I/usr/local/cuda/include"
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found, assuming /usr/include/CL/cl.h" >&5
$as_echo "not found, assuming /usr/include/CL/cl.h" >&6; }
OPENCL_CFLAGS=""
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
fi
$as_echo "#define HAVE_OPENCL 1" >>confdefs.h
......@@ -6492,8 +6511,14 @@ else
HAVE_OPENCL_FALSE=
fi
if test "x$cl_cl_h" == "xtrue"; then :
$as_echo "#define HAVE_CL_CL_H 1" >>confdefs.h
fi
# Check whether --enable-gtk was given.
if test "${enable_gtk+set}" = set; then :
enableval=$enable_gtk;
......
......@@ -53,28 +53,49 @@ AS_IF([test "x$enable_gsl_fudge" == "xyes"],
AC_MSG_RESULT([no])
])
AC_ARG_ENABLE(opencl, AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL]))
AC_ARG_WITH(opencl, AS_HELP_STRING([--with-opencl], [Use OpenCL]))
AC_MSG_CHECKING([whether to use OpenCL])
AS_IF([test "x$enable_opencl" == "xyes"],
AS_IF([test "x$with_opencl" == "xyes"],
[
AC_MSG_RESULT([yes])
AC_CHECK_HEADERS([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
AC_MSG_CHECKING([cl.h])
AS_IF([test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
[
AC_MSG_RESULT([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h])
OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
OPENCL_LIBS="-framework OpenCL"
],
])
AS_IF([test -f /usr/local/cuda/cuda/include/CL/cl.h],
[
OPENCL_CFLAGS="-I/usr/include/CL"
AC_MSG_RESULT([/usr/local/cuda/cuda/include/CL/cl.h])
OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
])
AS_IF([test -f /usr/local/cuda/include/CL/cl.h],
[
AC_MSG_RESULT([/usr/local/cuda/include/CL/cl.h])
OPENCL_CFLAGS="-I/usr/local/cuda/include"
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
], [
AC_MSG_RESULT([not found, assuming /usr/include/CL/cl.h])
OPENCL_CFLAGS=""
OPENCL_LIBS="-lOpenCL"
cl_cl_h=true
])
AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available])
have_opencl=true
],
[
], [
AC_MSG_RESULT([no])
])
AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue)
AS_IF([test "x$cl_cl_h" == "xtrue"],
[
AC_DEFINE([HAVE_CL_CL_H], [1], [Define to 1 if CL/cl.h should be used])
])
AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [Disable GTK+/GLib]))
havegtk=false
......
......@@ -24,15 +24,20 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT
| CLK_FILTER_LINEAR;
float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
float *ttp, int sampling)
float4 get_q(int fs, int ss, float res, float clen, float k,
float *ttp, float corner_x, float corner_y,
float fsx, float fsy, float ssx, float ssy)
{
float rx, ry, r;
float az, tt;
float4 q;
float xs, ys;
rx = ((float)x - sampling*cx)/(res*sampling);
ry = ((float)y - sampling*cy)/(res*sampling);
xs = fs*fsx + ss*ssx;
ys = fs*fsy + ss*ssy;
rx = (xs + corner_x) / res;
ry = (ys + corner_y) / res;
r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f));
......@@ -185,19 +190,19 @@ float molecule_factor(global float *intensities, global float *flags,
kernel void diffraction(global float *diff, global float *tt, float klow,
int w, float cx, float cy,
int w, float corner_x, float corner_y,
float res, float clen, float16 cell,
global float *intensities,
int xmin, int ymin, int sampling, local float *tmp,
int min_fs, int min_ss, int sampling, local float *tmp,
float kstep,
read_only image2d_t func_a,
read_only image2d_t func_b,
read_only image2d_t func_c,
global float *flags)
global float *flags,
float fsx, float fsy, float ssx, float ssy)
{
float ttv;
const int x = get_global_id(0) + (xmin*sampling);
const int y = get_global_id(1) + (ymin*sampling);
float fs, ss;
float f_lattice, I_lattice;
float I_molecule;
float4 q;
......@@ -205,12 +210,21 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
const int ly = get_local_id(1);
const int lb = get_local_id(2);
float k = klow + kstep * get_local_id(2);
const int ax = x / sampling;
const int ay = y / sampling;
const int afs = fs / sampling;
const int ass = ss / sampling; /* Array index of target pixel */
float intensity;
/* Calculate value */
q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling);
/* Calculate fractional coordinates in fs/ss */
fs = convert_float(get_global_id(0) + (min_fs*sampling))
/ convert_float(sampling);
ss = convert_float(get_global_id(1) + (min_ss*sampling))
/ convert_float(sampling);
/* Get the scattering vector */
q = get_q(fs, ss, res, clen, k, &ttv,
corner_x, corner_y, fsx, fsy, ssx, ssy);
/* Calculate the diffraction */
f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
I_molecule = molecule_factor(intensities, flags, cell, q);
I_lattice = pow(f_lattice, 2.0f);
......@@ -228,16 +242,19 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
int i;
float sum = 0.0;
float val;
int idx;
idx = convert_int_rtz(fs)+w*convert_int_rtz(ss);
for ( i=0; i<sampling*sampling*get_local_size(2); i++ )
sum += tmp[i];
val = sum / (float)(sampling*sampling*get_local_size(2));
diff[ax+w*ay] = val;
val = sum / convert_float(sampling*sampling*get_local_size(2));
diff[idx] = val;
/* Leader thread also records the 2theta value.
* This should really be averaged across all pixels, but
* I strongly suspect this would be a waste of time. */
tt[ax+w*ay] = ttv;
tt[idx] = ttv;
}
}
......@@ -3,17 +3,25 @@
*
* OpenCL utility functions
*
* (c) 2006-2010 Thomas White <taw@physics.org>
* (c) 2006-2011 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef HAVE_CL_CL_H
#include <CL/cl.h>
#else
#include <cl.h>
#endif
#include "utils.h"
......
......@@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson)
double ph_per_e;
double area;
double max_tt = 0.0;
int n_inf1 = 0;
int n_neg1 = 0;
int n_nan1 = 0;
int n_inf2 = 0;
int n_neg2 = 0;
int n_nan2 = 0;
/* How many photons are scattered per electron? */
area = M_PI*pow(image->beam->beam_radius, 2.0);
......@@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson)
struct panel *p;
intensity = (double)image->data[x + image->width*y];
if ( isinf(intensity) ) {
ERROR("Infinity at %i,%i\n", x, y);
}
if ( intensity < 0.0 ) {
ERROR("Negative at %i,%i\n", x, y);
}
if ( isnan(intensity) ) {
ERROR("NaN at %i,%i\n", x, y);
}
if ( isinf(intensity) ) n_inf1++;
if ( intensity < 0.0 ) n_neg1++;
if ( isnan(intensity) ) n_nan1++;
p = find_panel(image->det, x, y);
......@@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson)
image->data[x + image->width*y] = counts
* image->beam->adu_per_photon;
if ( isinf(image->data[x+image->width*y]) ) {
ERROR("Processed infinity at %i,%i\n", x, y);
}
if ( isnan(image->data[x+image->width*y]) ) {
ERROR("Processed NaN at %i,%i\n", x, y);
}
if ( image->data[x+image->width*y] < 0.0 ) {
ERROR("Processed negative at %i,%i %f\n", x, y, counts);
}
/* Sanity checks */
if ( isinf(image->data[x+image->width*y]) ) n_inf2++;
if ( isnan(image->data[x+image->width*y]) ) n_nan2++;
if ( image->data[x+image->width*y] < 0.0 ) n_neg2++;
if ( image->twotheta[x + image->width*y] > max_tt ) {
max_tt = image->twotheta[x + image->width*y];
......@@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson)
rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9);
STATUS("Halve the d values to get the voxel size for a synthesis.\n");
if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) {
ERROR("WARNING: The raw calculation produced %i negative"
" values, %i infinities and %i NaNs.\n",
n_neg1, n_inf1, n_nan1);
ERROR("WARNING: After processing, there were %i negative"
" values, %i infinities and %i NaNs.\n",
n_neg2, n_inf2, n_nan2);
}
}
......
......@@ -3,19 +3,27 @@
*
* Calculate diffraction patterns by Fourier methods (GPU version)
*
* (c) 2006-2010 Thomas White <taw@physics.org>
* (c) 2006-2011 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <complex.h>
#ifdef HAVE_CL_CL_H
#include <CL/cl.h>
#else
#include <cl.h>
#endif
#include "image.h"
#include "utils.h"
......@@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n)
}
static int sfloat(struct gpu_context *gctx, int idx, float val)
{
cl_int err;
err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set kernel argument %i: %s\n",
idx, clError(err));
return 1;
}
return 0;
}
static int setint(struct gpu_context *gctx, int idx, int val)
{
cl_int err;
err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set kernel argument %i: %s\n",
idx, clError(err));
return 1;
}
return 0;
}
static int setmem(struct gpu_context *gctx, int idx, cl_mem val)
{
cl_int err;
err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set kernel argument %i: %s\n",
idx, clError(err));
return 1;
}
return 0;
}
void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, UnitCell *ucell)
{
......@@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
cl_int4 ncells;
const int sampling = SAMPLING;
cl_float bwstep;
int n_inf = 0;
int n_neg = 0;
int n_nan = 0;
if ( gctx == NULL ) {
ERROR("GPU setup failed.\n");
......@@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
check_sinc_lut(gctx, nb);
check_sinc_lut(gctx, nc);
err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 0: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 1: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 2: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 3: %s\n", clError(err));
return;
}
if ( setmem(gctx, 0, gctx->diff) ) return;
if ( setmem(gctx, 1, gctx->tt) ) return;
if ( setmem(gctx, 9, gctx->intensities) ) return;
if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
if ( setmem(gctx, 18, gctx->flags) ) return;
/* Unit cell */
clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 8: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 9: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 12: %s\n", clError(err));
ERROR("Couldn't set unit cell: %s\n", clError(err));
return;
}
/* Local memory for reduction */
clSetKernelArg(gctx->kern, 13,
BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 13: %s\n", clError(err));
return;
}
/* Bandwidth sampling step */
clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 14: %s\n", clError(err));
return;
}
/* LUT in 'a' direction */
clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 15: %s\n", clError(err));
ERROR("Couldn't set local memory: %s\n", clError(err));
return;
}
/* LUT in 'b' direction */
clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 16: %s\n", clError(err));
return;
}
/* LUT in 'c' direction */
clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 17: %s\n", clError(err));
return;
}
/* Flag array */
clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set flag array: %s\n", clError(err));
return;
}
if ( sfloat(gctx, 2, klow) ) return;
if ( setint(gctx, 3, image->width) ) return;
if ( setint(gctx, 12, sampling) ) return;
if ( sfloat(gctx, 14, bwstep) ) return;
/* Iterate over panels */
event = malloc(image->det->n_panels * sizeof(cl_event));
......@@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* In a future version of OpenCL, this could be done
* with a global work offset. But not yet... */
dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x;
dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y;
dims[0] = 1+image->det->panels[p].max_fs
-image->det->panels[p].min_fs;
dims[1] = 1+image->det->panels[p].max_ss
-image->det->panels[p].min_ss;
dims[0] *= SAMPLING;
dims[1] *= SAMPLING;
dims[2] = BWSAMPLING;
clSetKernelArg(gctx->kern, 4, sizeof(cl_float),
&image->det->panels[p].cx);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 4: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 5, sizeof(cl_float),
&image->det->panels[p].cy);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 5: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 6, sizeof(cl_float),
&image->det->panels[p].res);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 6: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 7, sizeof(cl_float),
&image->det->panels[p].clen);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 7: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 10, sizeof(cl_int),
&image->det->panels[p].min_x);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 10: %s\n", clError(err));
return;
}
clSetKernelArg(gctx->kern, 11, sizeof(cl_int),
&image->det->panels[p].min_y);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 11: %s\n", clError(err));
return;
}
if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return;
if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return;
if ( sfloat(gctx, 6