Commit 9aafea1b authored by Thomas White's avatar Thomas White
Browse files

Use symmetry when simulating on the GPU

parent c25120f4
......@@ -70,21 +70,12 @@ float lattice_factor(float16 cell, float4 q,
}
float get_intensity(global float *intensities, float16 cell, float4 q)
float lookup_intensity(global float *intensities,
signed int h, signed int k, signed int l)
{
float hf, kf, lf;
int h, k, l;
int idx;
hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */
kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */
lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */
h = round(hf);
k = round(kf);
l = round(lf);
/* Return a silly value if indices are out of range */
/* Out of range? */
if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
return 0.0;
}
......@@ -93,14 +84,106 @@ float get_intensity(global float *intensities, float16 cell, float4 q)
k = (k>=0) ? k : k+IDIM;
l = (l>=0) ? l : l+IDIM;
if ( (h>=IDIM) || (k>=IDIM) || (l>=IDIM) ) return 0.0;
idx = h + (IDIM*k) + (IDIM*IDIM*l);
return intensities[idx];
}
float lookup_flagged_intensity(global float *intensities, global float *flags,
signed int h, signed int k, signed int l)
{
return lookup_intensity(intensities, h, k, l)
* lookup_intensity(flags, h, k, l);
}
float molecule_factor(global float *intensities, global float *flags,
float16 cell, float4 q)
{
float hf, kf, lf;
int h, k, l;
float val = 0.0;
signed int i;
#ifdef FLAT_INTENSITIES
return 1.0e5;
#else
hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */
kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */
lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */
h = round(hf);
k = round(kf);
l = round(lf);
i = -h-k;
#ifdef PG1
val += lookup_flagged_intensity(intensities, flags, h, k, l);
#endif /* PG1 */
#ifdef PG1BAR
val += lookup_flagged_intensity(intensities, flags, h, k, l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
#endif /* PG1BAR */
#ifdef PG6
val += lookup_flagged_intensity(intensities, flags, h, k, l);
val += lookup_flagged_intensity(intensities, flags, i, h, l);
val += lookup_flagged_intensity(intensities, flags, k, i, l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
#endif /* PG6 */
#ifdef PG6M
val += lookup_flagged_intensity(intensities, flags, h, k, l);
val += lookup_flagged_intensity(intensities, flags, i, h, l);
val += lookup_flagged_intensity(intensities, flags, k, i, l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
val += lookup_flagged_intensity(intensities, flags, -i, -h, -l);
val += lookup_flagged_intensity(intensities, flags, -k, -i, -l);
val += lookup_flagged_intensity(intensities, flags, h, k, -l);
val += lookup_flagged_intensity(intensities, flags, i, h, -l);
val += lookup_flagged_intensity(intensities, flags, k, i, -l);
#endif /* PG6M */
#ifdef PG6MMM
val += lookup_flagged_intensity(intensities, flags, h, k, l);
val += lookup_flagged_intensity(intensities, flags, i, h, l);
val += lookup_flagged_intensity(intensities, flags, k, i, l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
val += lookup_flagged_intensity(intensities, flags, k, h, -l);
val += lookup_flagged_intensity(intensities, flags, h, i, -l);
val += lookup_flagged_intensity(intensities, flags, i, k, -l);
val += lookup_flagged_intensity(intensities, flags, -k, -h, -l);
val += lookup_flagged_intensity(intensities, flags, -h, -i, -l);
val += lookup_flagged_intensity(intensities, flags, -i, -k, -l);
val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
val += lookup_flagged_intensity(intensities, flags, -i, -h, -l);
val += lookup_flagged_intensity(intensities, flags, -k, i, -l);
val += lookup_flagged_intensity(intensities, flags, h, k, -l);
val += lookup_flagged_intensity(intensities, flags, i, h, -l);
val += lookup_flagged_intensity(intensities, flags, k, i, -l);
val += lookup_flagged_intensity(intensities, flags, -k, -h, l);
val += lookup_flagged_intensity(intensities, flags, -h, -i, l);
val += lookup_flagged_intensity(intensities, flags, -i, -k, l);
val += lookup_flagged_intensity(intensities, flags, k, h, l);
val += lookup_flagged_intensity(intensities, flags, h, i, l);
val += lookup_flagged_intensity(intensities, flags, i, k, l);
#endif /* PG6MMM */
return val;
#endif /* FLAT_INTENSITIIES */
}
kernel void diffraction(global float *diff, global float *tt, float klow,
int w, float cx, float cy,
float res, float clen, float16 cell,
......@@ -109,7 +192,8 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
float kstep,
read_only image2d_t func_a,
read_only image2d_t func_b,
read_only image2d_t func_c)
read_only image2d_t func_c,
global float *flags)
{
float ttv;
const int x = get_global_id(0) + (xmin*sampling);
......@@ -128,7 +212,7 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
/* Calculate value */
q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling);
f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
I_molecule = get_intensity(intensities, cell, q);
I_molecule = molecule_factor(intensities, flags, cell, q);
I_lattice = pow(f_lattice, 2.0f);
/* Write the value to local memory */
......
......@@ -146,13 +146,14 @@ static void show_build_log(cl_program prog, cl_device_id dev)
cl_program load_program(const char *filename, cl_context ctx,
cl_device_id dev, cl_int *err)
cl_device_id dev, cl_int *err, const char *extra_cflags)
{
FILE *fh;
cl_program prog;
char *source;
size_t len;
cl_int r;
char cflags[1024] = "";
fh = fopen(filename, "r");
if ( fh == NULL ) {
......@@ -172,9 +173,12 @@ cl_program load_program(const char *filename, cl_context ctx,
return 0;
}
r = clBuildProgram(prog, 0, NULL,
"-Werror -I"DATADIR"/crystfel/ -cl-no-signed-zeros",
NULL, NULL);
snprintf(cflags, 1023, "-Werror ");
strncat(cflags, "-I"DATADIR"/crystfel/ ", 1023-strlen(cflags));
strncat(cflags, "-cl-no-signed-zeros ", 1023-strlen(cflags));
strncat(cflags, extra_cflags, 1023-strlen(cflags));
r = clBuildProgram(prog, 0, NULL, cflags, NULL, NULL);
if ( r != CL_SUCCESS ) {
ERROR("Couldn't build program '%s'\n", filename);
show_build_log(prog, dev);
......
......@@ -20,7 +20,8 @@
extern const char *clError(cl_int err);
extern cl_device_id get_cl_dev(cl_context ctx, int n);
extern cl_program load_program(const char *filename, cl_context ctx,
cl_device_id dev, cl_int *err);
cl_device_id dev, cl_int *err,
const char *extra_cflags);
#endif /* CLUTILS_H */
......@@ -39,6 +39,7 @@ struct gpu_context
cl_program prog;
cl_kernel kern;
cl_mem intensities;
cl_mem flags;
cl_mem tt;
size_t tt_size;
......@@ -219,6 +220,13 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
/* Flag array */
clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set flag array: %s\n", clError(err));
return;
}
/* Iterate over panels */
event = malloc(image->det->n_panels * sizeof(cl_event));
for ( p=0; p<image->det->n_panels; p++ ) {
......@@ -331,7 +339,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* Setup the OpenCL stuff, create buffers, load the structure factor table */
struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities, unsigned char *flags,
int dev_num)
const char *sym, int dev_num)
{
struct gpu_context *gctx;
cl_uint nplat;
......@@ -341,8 +349,11 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
cl_device_id dev;
size_t intensities_size;
float *intensities_ptr;
size_t flags_size;
float *flags_ptr;
size_t maxwgsize;
int i;
char cflags[512] = "";
STATUS("Setting up GPU...\n");
......@@ -396,8 +407,9 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
} else {
for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
intensities_ptr[i] = 1e10;
intensities_ptr[i] = 1e5;
}
strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags));
}
gctx->intensities = clCreateBuffer(gctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
......@@ -409,6 +421,44 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
free(intensities_ptr);
if ( strcmp(sym, "1") == 0 ) {
strncat(cflags, "-DPG1 ", 511-strlen(cflags));
} else if ( strcmp(sym, "-1") == 0 ) {
strncat(cflags, "-DPG1BAR ", 511-strlen(cflags));
} else if ( strcmp(sym, "6/mmm") == 0 ) {
strncat(cflags, "-DPG6MMM ", 511-strlen(cflags));
} else if ( strcmp(sym, "6") == 0 ) {
strncat(cflags, "-DPG6 ", 511-strlen(cflags));
} else if ( strcmp(sym, "6/m") == 0 ) {
strncat(cflags, "-DPG6M ", 511-strlen(cflags));
} else {
ERROR("Sorry! Point group '%s' is not currently supported"
" on the GPU. I'm using '1' instead.\n", sym);
strncat(cflags, "-DPG1 ", 511-strlen(cflags));
}
/* Create a flag array */
flags_size = IDIM*IDIM*IDIM*sizeof(cl_float);
flags_ptr = malloc(flags_size);
if ( flags != NULL ) {
for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
flags_ptr[i] = flags[i];
}
} else {
for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
flags_ptr[i] = 1.0;
}
}
gctx->flags = clCreateBuffer(gctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
flags_size, flags_ptr, &err);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't allocate flag buffer\n");
free(gctx);
return NULL;
}
free(flags_ptr);
gctx->tt_size = image->width*image->height*sizeof(cl_float);
gctx->tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, gctx->tt_size,
NULL, &err);
......@@ -419,7 +469,7 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
gctx->prog = load_program(DATADIR"/crystfel/diffraction.cl", gctx->ctx,
dev, &err);
dev, &err, cflags);
if ( err != CL_SUCCESS ) {
free(gctx);
return NULL;
......
......@@ -27,7 +27,8 @@ extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, UnitCell *ucell);
extern struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities,
const unsigned char *flags, int dev_num);
const unsigned char *flags,
const char *sym, int dev_num);
extern void cleanup_gpu(struct gpu_context *gctx);
#else
......@@ -41,7 +42,8 @@ static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
static struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities,
const unsigned char *flags, int dev_num)
const unsigned char *flags,
const char *sym, int dev_num)
{
return NULL;
}
......
......@@ -286,7 +286,7 @@ static void simulate_and_write(struct image *simage, struct gpu_context **gctx,
* Unfortunately, setup has to go here since until now we don't know
* enough about the situation. */
if ( (gctx != NULL) && (*gctx == NULL) ) {
*gctx = setup_gpu(0, simage, intensities, flags, gpu_dev);
*gctx = setup_gpu(0, simage, intensities, flags, sym, gpu_dev);
}
if ( (gctx != NULL) && (*gctx != NULL) ) {
......
......@@ -539,7 +539,8 @@ int main(int argc, char *argv[])
if ( config_gpu ) {
if ( gctx == NULL ) {
gctx = setup_gpu(config_nosfac, &image,
intensities, flags, gpu_dev);
intensities, flags, sym,
gpu_dev);
}
get_diffraction_gpu(gctx, &image, na, nb, nc, cell);
} else {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment