process_hkl.c 12.8 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
6
 * (c) 2006-2011 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflist-utils.h"
28
#include "symmetry.h"
29
#include "stream.h"
30
#include "beam-parameters.h"
31
32
#include "reflist.h"
#include "image.h"
33
34


35
/* Number of divisions for intensity histograms */
36
#define NBINS (50)
Thomas White's avatar
Thomas White committed
37
38


39
40
41
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
42
43
44
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
45
46
47
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
48
"                             Default: processed.hkl).\n"
49
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
50
"  -b, --beam=<file>         Get beam parameters from file (used for sigmas).\n"
Thomas White's avatar
Thomas White committed
51
"\n"
52
53
54
55
56
57
58
59
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
60
"      --start-after=<n>     Skip n patterns at the start of the stream.\n"
61
62
63
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
64
65
"  -g, --histogram=<h,k,l>   Calculate the histogram of measurements for this\n"
"                             reflection.\n"
66
"      --rmerge              Calculate and report Rmerge and Rpim\n"
67
"\n"
68
69
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
70
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
71
72
"      --reference=<file>    Compare against intensities from <file> when\n"
"                             scaling or resolving ambiguities.\n"
73
74
"                             The symmetry of the reference list must be the\n"
"                             same as that given with '-y'.\n"
75
);
76
77
78
}


79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
static void plot_histogram(double *vals, int n)
{
	int i;
	double max = -INFINITY;
	double min = +INFINITY;
	double step;
	int histo[NBINS];
	FILE *fh;

	fh = fopen("histogram.dat", "w");
	if ( fh == NULL ) {
		ERROR("Couldn't open 'histogram.dat'\n");
		return;
	}

	for ( i=0; i<n; i++ ) {
		if ( vals[i] > max ) max = vals[i];
		if ( vals[i] < min ) min = vals[i];
	}
	STATUS("%f %f\n", min, max);
99
	min--;  max++;
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

	for ( i=0; i<NBINS; i++ ) {
		histo[i] = 0;
	}

	step = (max-min)/NBINS;

	for ( i=0; i<n; i++ ) {
		int bin;
		bin = (vals[i]-min)/step;
		histo[bin]++;
	}

	for ( i=0; i<NBINS; i++ ) {
		fprintf(fh, "%f %i\n", min+step*i, histo[i]);
	}

	fclose(fh);
}


121
122
123
124
static void merge_pattern(RefList *model, RefList *new, int max_only,
                          const char *sym,
                          double *hist_vals, signed int hist_h,
                          signed int hist_k, signed int hist_l, int *hist_n)
125
{
126
127
	Reflection *refl;
	RefListIterator *iter;
128

129
130
131
	for ( refl = first_refl(new, &iter);
	      refl != NULL;
	      refl = next_refl(refl, iter) ) {
132
133

		double intensity;
134
		signed int h, k, l;
135
136
		Reflection *model_version;
		double model_int;
137

138
139
140
141
142
		get_indices(refl, &h, &k, &l);
		model_version = find_refl(model, h, k, l);
		if ( model_version == NULL ) {
			model_version = add_refl(model, h, k, l);
		}
143

Thomas White's avatar
Thomas White committed
144
		/* Put into the asymmetric unit for the target group */
145
		get_asymm(h, k, l, &h, &k, &l, sym);
146

147
148
		/* Read the intensity from the original location
		 * (i.e. before screwing around with symmetry) */
149
150
151
152
		intensity = get_intensity(refl);

		/* Get the current model intensity */
		model_int = get_intensity(model_version);
153

Thomas White's avatar
Thomas White committed
154
		/* User asked for max only? */
155
156
		if ( !max_only ) {
			set_int(model_version, model_int + intensity);
157
		} else {
158
159
			if ( intensity > get_intensity(model_version) ) {
				set_int(model_version, intensity);
160
161
162
			}
		}

163
164
165
		double dev = get_sum_squared_dev(model_version);
		set_sum_squared_dev(model_version,
		                    dev + pow(intensity-model_int, 2.0));
166

167
168
169
		/* Increase redundancy */
		int cur_redundancy = get_redundancy(model_version);
		set_redundancy(model_version, cur_redundancy+1);
Thomas White's avatar
Thomas White committed
170

171
172
173
174
175
176
177
178
		if ( hist_vals != NULL ) {
			int p = *hist_n;
			if ( (h==hist_h) && (k==hist_k) && (l==hist_l) ) {
				hist_vals[p] = intensity;
				*hist_n = p+1;
			}
		}

179
180
181
182
	}
}


183
184
185
enum {
	SCALE_NONE,
	SCALE_CONSTINT,
186
187
	SCALE_INTPERBRAGG,
	SCALE_TWOPASS,
188
189
190
};


191
static void scale_intensities(RefList *model, RefList *new, const char *sym)
192
193
{
	double s;
194
195
	double top = 0.0;
	double bot = 0.0;
196
	const int scaling = SCALE_INTPERBRAGG;
197
198
199
	Reflection *refl;
	RefListIterator *iter;
	Reflection *model_version;
200

201
202
203
	for ( refl = first_refl(new, &iter);
	      refl != NULL;
	      refl = next_refl(refl, iter) ) {
204
205
206

		double i1, i2;
		signed int hu, ku, lu;
207
		signed int h, k, l;
208

209
		get_indices(refl, &h, &k, &l);
210

211
212
213
		switch ( scaling ) {
		case SCALE_TWOPASS :

214
215
216
217
			model_version = find_refl(model, h, k, l);
			if ( model_version == NULL ) continue;

			get_asymm(h, k, l, &hu, &ku, &lu, sym);
218

219
220
			i1 = get_intensity(model_version);
			i2 = get_intensity(refl);
221
222
223
224

			/* Calculate LSQ estimate of scaling factor */
			top += i1 * i2;
			bot += i2 * i2;
225

226
			break;
227

228
229
230
		case SCALE_CONSTINT :

			/* Sum up the intensity in the pattern */
231
			i2 = get_intensity(refl);
232
233
234
235
			top += i2;

			break;

236
237
238
		case SCALE_INTPERBRAGG :

			/* Sum up the intensity in the pattern */
239
			i2 = get_intensity(refl);
240
241
242
243
244
			top += i2;
			bot += 1.0;

			break;

245
		}
246
247
248

	}

249
250
251
252
253
254
255
	switch ( scaling ) {
	case SCALE_TWOPASS :
		s = top / bot;
		break;
	case SCALE_CONSTINT :
		s = 1000.0 / top;
		break;
256
257
258
	case SCALE_INTPERBRAGG :
		s = 1000.0 / (top/bot);
		break;
259
	}
260
261

	/* Multiply the new pattern up by "s" */
262
263
264
265
266
267
268
	for ( refl = first_refl(new, &iter);
	      refl != NULL;
	      refl = next_refl(refl, iter) ) {

		double intensity = get_intensity(refl);
		set_int(refl, intensity*s);

269
270
271
272
	}
}


273
static void merge_all(FILE *fh, RefList *model,
274
                      int config_maxonly, int config_scale, int config_sum,
275
                      int config_startafter, int config_stopafter,
276
                      const char *sym,
277
                      int n_total_patterns,
278
279
280
                      double *hist_vals, signed int hist_h,
                      signed int hist_k, signed int hist_l,
                      int *hist_i)
281
{
282
	int rval;
Thomas White's avatar
Thomas White committed
283
	int n_patterns = 0;
284
285
286
	int n_used = 0;
	Reflection *refl;
	RefListIterator *iter;
287

288
289
290
	if ( skip_some_files(fh, config_startafter) ) {
		ERROR("Failed to skip first %i files.\n", config_startafter);
		return;
291
292
	}

293
294
	do {

295
		struct image image;
296

297
298
299
		/* Get data from next chunk */
		rval = read_chunk(fh, &image);
		if ( rval ) continue;
300

301
302
303
		n_patterns++;

		if ( (image.reflections != NULL) && (image.indexed_cell) ) {
304
305
306

			/* Scale if requested */
			if ( config_scale ) {
307
308
				scale_intensities(model, image.reflections,
				                  sym);
309
310
			}

311
312
313
			merge_pattern(model, image.reflections, config_maxonly,
			              sym, hist_vals, hist_h, hist_k, hist_l,
			              hist_i);
314

315
			n_used++;
316
317
318

		}

Thomas White's avatar
Thomas White committed
319
		free(image.filename);
320
321
322
		reflist_free(image.reflections);
		image_feature_list_free(image.features);
		cell_free(image.indexed_cell);
323

324
325
		progress_bar(n_patterns, n_total_patterns-config_startafter,
		             "Merging");
326

327
	} while ( rval == 0 );
328

329
330
	/* Divide by counts to get mean intensity if necessary */
	if ( !config_sum && !config_maxonly ) {
331

332
333
		Reflection *refl;
		RefListIterator *iter;
334

335
336
337
		for ( refl = first_refl(model, &iter);
		      refl != NULL;
		      refl = next_refl(refl, iter) ) {
338

339
340
341
			double intensity = get_intensity(refl);
			double sum_squared_dev = get_sum_squared_dev(refl);
			int red = get_redundancy(refl);
342

343
344
			set_int(refl, intensity / (double)red);
			set_sum_squared_dev(refl, sum_squared_dev/(double)red);
345

Thomas White's avatar
Thomas White committed
346
		}
347

Thomas White's avatar
Thomas White committed
348
349
	}

350
351
352
353
354
355
356
357
358
359
360
	/* Calculate ESDs */
	for ( refl = first_refl(model, &iter);
	      refl != NULL;
	      refl = next_refl(refl, iter) ) {

		double sum_squared_dev = get_sum_squared_dev(refl);
		int red = get_redundancy(refl);

		set_esd_intensity(refl, sum_squared_dev/(double)red);

	}
Thomas White's avatar
Thomas White committed
361

362
	STATUS("%i of the patterns could be used.\n", n_used);
363
364
365
}


366
367
368
369
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
370
	char *output = NULL;
371
	FILE *fh;
372
	RefList *model;
373
	UnitCell *cell = NULL;
Thomas White's avatar
Thomas White committed
374
	int config_maxonly = 0;
375
	int config_startafter = 0;
Thomas White's avatar
Thomas White committed
376
	int config_stopafter = 0;
377
	int config_sum = 0;
378
	int config_scale = 0;
379
	unsigned int n_total_patterns;
380
	char *sym = NULL;
381
	char *pdb = NULL;
382
383
384
385
	char *histo = NULL;
	signed int hist_h, hist_k, hist_l;
	double *hist_vals = NULL;
	int hist_i;
386
	struct beam_params *beam = NULL;
387
	int space_for_hist = 0;
388
389
390
391
392

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
393
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
394
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
395
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
396
		{"stop-after",         1, NULL,               's'},
397
		{"start-after",        1, NULL,               'f'},
398
		{"sum",                0, &config_sum,         1},
399
		{"scale",              0, &config_scale,       1},
400
		{"symmetry",           1, NULL,               'y'},
401
		{"pdb",                1, NULL,               'p'},
402
		{"histogram",          1, NULL,               'g'},
403
		{"beam",               1, NULL,               'b'},
404
405
406
407
		{0, 0, NULL, 0}
	};

	/* Short options */
408
	while ((c = getopt_long(argc, argv, "hi:e:o:p:y:g:f:b:",
409
	                        longopts, NULL)) != -1) {
410
411

		switch (c) {
Thomas White's avatar
Thomas White committed
412
		case 'h' :
413
414
415
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
416
		case 'i' :
417
418
419
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
420
		case 'o' :
421
422
423
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
424
		case 's' :
Thomas White's avatar
Thomas White committed
425
426
427
			config_stopafter = atoi(optarg);
			break;

428
429
430
431
		case 'f' :
			config_startafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
432
		case 'p' :
433
434
435
			pdb = strdup(optarg);
			break;

436
437
438
439
		case 'y' :
			sym = strdup(optarg);
			break;

440
441
442
443
		case 'g' :
			histo = strdup(optarg);
			break;

444
445
446
447
448
449
450
451
452
		case 'b' :
			beam = get_beam_parameters(optarg);
			if ( beam == NULL ) {
				ERROR("Failed to load beam parameters"
				      " from '%s'\n", optarg);
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
453
		case 0 :
454
455
			break;

Thomas White's avatar
Thomas White committed
456
		default :
457
458
459
460
461
462
463
464
465
466
			return 1;
		}

	}

	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

467
468
469
470
	if ( output == NULL ) {
		output = strdup("processed.hkl");
	}

471
472
473
474
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

Thomas White's avatar
Thomas White committed
475
476
477
	cell = load_cell_from_pdb(pdb);
	free(pdb);

478
479
	if ( sym == NULL ) sym = strdup("1");

480
481
482
483
484
485
486
487
488
489
490
	/* Open the data stream */
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}
Thomas White's avatar
Tidy-up    
Thomas White committed
491

492
493
494
495
	/* Count the number of patterns in the file */
	n_total_patterns = count_patterns(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);
	rewind(fh);
496

497
498
	model = reflist_new();

499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
	if ( histo != NULL ) {
		int r;
		r = sscanf(histo, "%i,%i,%i", &hist_h, &hist_k, &hist_l);
		if ( r != 3 ) {
			ERROR("Invalid indices for '--histogram'\n");
			return 1;
		}
		space_for_hist = n_total_patterns * num_general_equivs(sym);
		hist_vals = malloc(space_for_hist * sizeof(double));
		free(histo);
		STATUS("Histogramming %i %i %i -> ", hist_h, hist_k, hist_l);
		/* Put into the asymmetric cell for the target group */
		get_asymm(hist_h, hist_k, hist_l,
		          &hist_h, &hist_k, &hist_l, sym);
		STATUS("%i %i %i\n", hist_h, hist_k, hist_l);
	}

516
	hist_i = 0;
517
	merge_all(fh, model, config_maxonly, config_scale, config_sum,
518
	          config_startafter, config_stopafter,
519
520
                  sym, n_total_patterns,
                  hist_vals, hist_h, hist_k, hist_l, &hist_i);
521
	rewind(fh);
522
523
524
	if ( space_for_hist && (hist_i >= space_for_hist) ) {
		ERROR("Histogram array was too small!\n");
	}
525

526
527
528
529
530
531
	if ( hist_vals != NULL ) {
		STATUS("%i %i %i was seen %i times.\n", hist_h, hist_k, hist_l,
		                                        hist_i);
		plot_histogram(hist_vals, hist_i);
	}

532
533
	STATUS("Extra pass to calculate ESDs...\n");
	rewind(fh);
534
535
536
	merge_all(fh, model, config_maxonly, config_scale, 0,
	          config_startafter, config_stopafter, sym, n_total_patterns,
	          NULL, 0, 0, 0, NULL);
537

538
	write_reflist(output, model, cell);
539

540
	cell_free(cell);
541
	fclose(fh);
542
	free(sym);
543
	reflist_free(model);
Thomas White's avatar
Thomas White committed
544
	free(output);
545
	if ( cell != NULL ) cell_free(cell);
546

547
548
	return 0;
}