process_hkl.c 16.3 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
Thomas White's avatar
Thomas White committed
6
 * (c) 2006-2010 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflections.h"
28
#include "likelihood.h"
29
#include "symmetry.h"
30
31


32
/* Number of divisions for intensity histograms */
33
#define NBINS (50)
Thomas White's avatar
Thomas White committed
34
35


36
37
38
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
39
40
41
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
42
43
44
45
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
"                             (don't specify for no output).\n"
46
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
47
"\n"
48
49
50
51
52
53
54
55
56
57
58
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
59
60
"  -g, --histogram=<h,k,l>   Calculate the histogram of measurements for this\n"
"                             reflection.\n"
61
"      --rmerge              Calculate and report Rmerge and Rpim\n"
62
"\n"
63
64
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
65
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
66
);
67
68
69
}


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
static void plot_histogram(double *vals, int n)
{
	int i;
	double max = -INFINITY;
	double min = +INFINITY;
	double step;
	int histo[NBINS];
	FILE *fh;

	fh = fopen("histogram.dat", "w");
	if ( fh == NULL ) {
		ERROR("Couldn't open 'histogram.dat'\n");
		return;
	}

	for ( i=0; i<n; i++ ) {
		if ( vals[i] > max ) max = vals[i];
		if ( vals[i] < min ) min = vals[i];
	}
	STATUS("%f %f\n", min, max);

	for ( i=0; i<NBINS; i++ ) {
		histo[i] = 0;
	}

	step = (max-min)/NBINS;

	for ( i=0; i<n; i++ ) {
		int bin;
		bin = (vals[i]-min)/step;
		histo[bin]++;
	}

	for ( i=0; i<NBINS; i++ ) {
		fprintf(fh, "%f %i\n", min+step*i, histo[i]);
	}

	fclose(fh);
}


111
112
113
114
115
/* Note "holo" needn't actually be a holohedral point group, if you want to try
 * something strange like resolving from a low-symmetry group into an even
 * lower symmetry one.
 */
static ReflItemList *get_twin_possibilities(const char *holo, const char *mero)
116
{
117
	ReflItemList *test_items;
118
	ReflItemList *twins;
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
	int np;

	np = num_general_equivs(holo) / num_general_equivs(mero);

	test_items = new_items();

	/* Some arbitrarily chosen reflections which can't be special
	 * reflections in any point group, i.e. lots of odd numbers,
	 * prime numbers and so on.  There's probably an analytical
	 * way of working these out, but this will do. */
	add_item(test_items, 1, 2, 3);
	add_item(test_items, 3, 7, 13);
	add_item(test_items, 5, 2, 1);

	twins = get_twins(test_items, holo, mero);
	delete_items(test_items);

Thomas White's avatar
Thomas White committed
136
137
	/* Idiot check.  Wouldn't be necessary if I could prove that the above
	 * set of arbitrarily chosen reflections were always general. */
138
139
140
141
142
143
144
145
146
	if ( num_items(twins) != np ) {
		ERROR("Whoops! Couldn't find all the twinning possiblities.\n");
		abort();
	}

	return twins;
}


Thomas White's avatar
Thomas White committed
147
148
149
static int resolve_twin(const double *model, ReflItemList *observed,
                        const double *patt, ReflItemList *items,
                        ReflItemList *twins, const char *holo, const char *mero)
150
{
151
152
153
154
155
156
157
158
159
160
161
162
163
	int n, i;
	double best_fom = 0.0;
	int best_op = 0;

	n = num_items(twins);

	for ( i=0; i<n; i++ ) {

		int j;
		int op;
		double *trial_ints = new_list_intensity();
		unsigned int *trial_counts = new_list_count();
		double fom;
Thomas White's avatar
Thomas White committed
164
		ReflItemList *intersection;
165
166
167
168
169
170
171
172
173
174

		op = get_item(twins, i)->op;

		for ( j=0; j<num_items(items); j++ ) {

			signed int h, k, l;
			struct refl_item *r = get_item(items, j);

			get_general_equiv(r->h, r->k, r->l, &h, &k, &l,
			                  holo, op);
175
			get_asymm(h, k, l, &h, &k, &l, mero);
176
177
178
179
180
181
182

			set_intensity(trial_ints, h, k, l,
			              lookup_intensity(patt, r->h, r->k, r->l));
			set_count(trial_counts, h, k, l, 1);

		}

Thomas White's avatar
Thomas White committed
183
184
185
		intersection = intersection_items(observed, items);
		fom = stat_pearson(trial_ints, model, intersection);
		delete_items(intersection);
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

		free(trial_ints);
		free(trial_counts);

		//printf(" %f", fom);
		if ( fom > best_fom ) {
			best_fom = fom;
			best_op = op;
		}

	}
	//printf("\n");

	return best_op;
}


Thomas White's avatar
Thomas White committed
203
204
205
static void merge_pattern(double *model, ReflItemList *observed,
                          const double *new,  ReflItemList *items,
                          unsigned int *model_counts,  int mo,
206
                          ReflItemList *twins,
207
208
                          const char *holo, const char *mero, double *hist_vals,
                          signed int hist_h, signed int hist_k,
209
210
                          signed int hist_l, int *hist_n, double *devs,
                          double *tots, double *means)
211
{
212
	int i;
213
	int twin;
214
	ReflItemList *sym_items = new_items();
215

216
	if ( twins != NULL ) {
Thomas White's avatar
Thomas White committed
217
218
		twin = resolve_twin(model, observed, new, items,
		                    twins, holo, mero);
219
220
221
	} else {
		twin = 0;
	}
222

223
	for ( i=0; i<num_items(items); i++ ) {
224
225

		double intensity;
226
		signed int hs, ks, ls;
227
228
		signed int h, k, l;
		struct refl_item *item;
229

230
231
		item = get_item(items, i);

232
233
234
235
		hs = item->h;
		ks = item->k;
		ls = item->l;

236
237
		/* Transform into correct side of the twin law.
		 * "twin" is always zero if no de-twinning is performed. */
238
		get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin);
239
240

		/* Put into the asymmetric cell for the target group */
241
		get_asymm(h, k, l, &h, &k, &l, mero);
242
243
244

		intensity = lookup_intensity(new, h, k, l);

Thomas White's avatar
Thomas White committed
245
		/* User asked for max only? */
246
247
248
249
250
251
252
253
		if ( !mo ) {
			integrate_intensity(model, h, k, l, intensity);
		} else {
			if ( intensity > lookup_intensity(model, h, k, l) ) {
				set_intensity(model, h, k, l, intensity);
			}
		}

254
255
256
257
258
259
260
		if ( tots != NULL ) {
			double m;
			m = lookup_intensity(means, h, k, l);
			integrate_intensity(tots, h, k, l, intensity);
			integrate_intensity(devs, h, k, l, fabs(intensity-m));
		}

261
262
263
264
265
266
267
268
269
270
		/* Already seen this reflection in this pattern? Complain. */
		if ( !find_item(sym_items, h, k, l) ) {
			/* Add the asymmetric version of this reflection to our
			 * temporary list.  One reflection (in the asymmetric
			 * unit) may appear more than once per pattern if
			 * symmetrically related reflections are present.
			 * That's fine... */
		}	add_item(sym_items, h, k, l);

		/* Increase count count */
Thomas White's avatar
Thomas White committed
271
272
		integrate_count(model_counts, h, k, l, 1);

273
274
275
276
277
278
279
280
		if ( hist_vals != NULL ) {
			int p = *hist_n;
			if ( (h==hist_h) && (k==hist_k) && (l==hist_l) ) {
				hist_vals[p] = intensity;
				*hist_n = p+1;
			}
		}

281
	}
282
283
284
285
286

	/* Dump the reflections in this pattern into the overall list */
	union_items(observed, sym_items);

	delete_items(sym_items);
287
288
289
}


Thomas White's avatar
Thomas White committed
290
291
static void merge_all(FILE *fh, double **pmodel, ReflItemList **pobserved,
                      unsigned int **pcounts,
292
293
294
                      int config_maxonly, int config_scale, int config_sum,
                      int config_stopafter,
                      ReflItemList *twins, const char *holo, const char *mero,
295
296
                      int n_total_patterns, double *hist_vals,
                      signed int hist_h, signed int hist_k, signed int hist_l,
297
                      int *hist_i, double *devs, double *tots, double *means)
298
299
300
301
302
303
304
305
{
	char *rval;
	float f0;
	int n_nof0 = 0;
	int f0_valid = 0;
	int n_patterns = 0;
	double *new_pattern = new_list_intensity();
	ReflItemList *items = new_items();
Thomas White's avatar
Thomas White committed
306
307
308
309
	ReflItemList *observed = new_items();
	double *model = new_list_intensity();
	unsigned int *counts = new_list_count();
	int i;
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

	do {

		char line[1024];
		signed int h, k, l;
		float intensity;
		int r;

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {

			/* Start of first pattern? */
			if ( n_patterns == 0 ) {
				n_patterns++;
				continue;
			}

			/* Assume a default I0 if we don't have one by now */
			if ( config_scale && !f0_valid ) {
				n_nof0++;
				f0 = 1.0;
			}

			/* Scale if requested */
			if ( config_scale ) {
Thomas White's avatar
Thomas White committed
336
337
338
				scale_intensities(model, observed,
				                  new_pattern, items,
				                  f0, f0_valid);
339
340
341
			}

			/* Start of second or later pattern */
Thomas White's avatar
Thomas White committed
342
343
			merge_pattern(model, observed, new_pattern, items,
			              counts, config_maxonly,
344
345
			              twins, holo, mero,
			              hist_vals, hist_h, hist_k, hist_l,
346
			              hist_i, devs, tots, means);
347
348
349

			if ( n_patterns == config_stopafter ) break;

350
			/* Reset for the next pattern */
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
			n_patterns++;
			clear_items(items);

			progress_bar(n_patterns, n_total_patterns, "Merging");

			f0_valid = 0;

		}

		if ( strncmp(line, "f0 = ", 5) == 0 ) {
			r = sscanf(line, "f0 = %f", &f0);
			if ( r != 1 ) {
				f0 = 1.0;
				f0_valid = 0;
				continue;
			}
			f0_valid = 1;
		}

		r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity);
		if ( r != 4 ) continue;

373
		/* Not interested in the central beam */
374
375
		if ( (h==0) && (k==0) && (l==0) ) continue;

376
377
		/* The same raw indices (before mapping into the asymmetric
		 * unit should not turn up twice in one pattern. */
378
379
380
381
382
		if ( find_item(items, h, k, l) != 0 ) {
			ERROR("More than one measurement for %i %i %i in"
			      " pattern number %i\n", h, k, l, n_patterns);
		}
		set_intensity(new_pattern, h, k, l, intensity);
383
384
385

		/* NB: This list contains raw indices, before working out
		 * where they belong in the asymmetric unit. */
386
387
388
389
390
391
392
		add_item(items, h, k, l);

	} while ( rval != NULL );

	delete_items(items);
	free(new_pattern);

393
394
	/* Calculate mean intensity if necessary */
	if ( !config_sum && !config_maxonly ) {
Thomas White's avatar
Thomas White committed
395
396
397
398
399
400
401
402
403
404
405
		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
			if ( counts[i] > 0 ) {
				model[i] /= (double)counts[i];
			}
		}
	}

	*pmodel = model;
	*pcounts = counts;
	*pobserved = observed;

406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
	STATUS("%i patterns had no f0 valid value.\n", n_nof0);
}


static int count_patterns(FILE *fh)
{
	char *rval;

	int n_total_patterns = 0;
	do {
		char line[1024];

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
		    n_total_patterns++;
		}
	} while ( rval != NULL );

	return n_total_patterns;
}


429
430
431
432
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
433
	char *output = NULL;
434
	FILE *fh;
435
	double *model;
Thomas White's avatar
Thomas White committed
436
	unsigned int *counts;
437
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
438
	int config_maxonly = 0;
Thomas White's avatar
Thomas White committed
439
	int config_stopafter = 0;
440
	int config_sum = 0;
441
	int config_scale = 0;
442
	int config_rmerge = 0;
443
	unsigned int n_total_patterns;
444
	char *sym = NULL;
445
	char *pdb = NULL;
446
	ReflItemList *twins;
Thomas White's avatar
Thomas White committed
447
	ReflItemList *observed;
448
	int i;
449
	const char *holo = NULL;
450
451
452
453
	char *histo = NULL;
	signed int hist_h, hist_k, hist_l;
	double *hist_vals = NULL;
	int hist_i;
454
455
456
457
458

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
459
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
460
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
461
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
462
		{"stop-after",         1, NULL,               's'},
463
		{"sum",                0, &config_sum,         1},
464
		{"scale",              0, &config_scale,       1},
465
		{"symmetry",           1, NULL,               'y'},
466
		{"pdb",                1, NULL,               'p'},
467
		{"histogram",          1, NULL,               'g'},
468
		{"rmerge",             0, &config_rmerge,      1},
469
470
471
472
		{0, 0, NULL, 0}
	};

	/* Short options */
473
	while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:g:",
474
	                        longopts, NULL)) != -1) {
475
476

		switch (c) {
Thomas White's avatar
Thomas White committed
477
		case 'h' :
478
479
480
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
481
		case 'i' :
482
483
484
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
485
		case 'o' :
486
487
488
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
489
		case 's' :
Thomas White's avatar
Thomas White committed
490
491
492
			config_stopafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
493
		case 'p' :
494
495
496
			pdb = strdup(optarg);
			break;

497
498
499
500
		case 'y' :
			sym = strdup(optarg);
			break;

501
502
503
504
		case 'g' :
			histo = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
505
		case 0 :
506
507
			break;

Thomas White's avatar
Thomas White committed
508
		default :
509
510
511
512
513
			return 1;
		}

	}

514
515
516
517
518
519
	if ( config_sum && config_rmerge ) {
		ERROR("Options --sum and --rmerge do not make sense"
		      " together.\n");
		return 1;
	}

520
521
522
523
524
	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

525
526
527
528
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

Thomas White's avatar
Thomas White committed
529
530
531
	cell = load_cell_from_pdb(pdb);
	free(pdb);

532
	/* Show useful symmetry information */
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
	if ( sym != NULL ) {
		holo = get_holohedral(sym);
		int np = num_general_equivs(holo) / num_general_equivs(sym);
		if ( np > 1 ) {

			STATUS("Resolving point group %s into %s "
			       "(%i possibilities)\n",
			       holo, sym, np);
			/* Get the list of twin/Bijvoet possibilities */
			twins = get_twin_possibilities(holo, sym);
			STATUS("Twin/inversion operation indices from %s are:",
			       holo);
			for ( i=0; i<num_items(twins); i++ ) {
				STATUS(" %i", get_item(twins, i)->op);
			}
			STATUS("\n");
549

550
551
552
553
		} else {
			STATUS("No twin/inversion resolution necessary.\n");
			twins = NULL;
		}
554
	} else {
555
		STATUS("Not performing any twin/inversion resolution.\n");
556
		twins = NULL;
557
558
		sym = strdup("1");
		holo = strdup("1");
559
	}
560

561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
	if ( histo != NULL ) {
		int r;
		r = sscanf(histo, "%i,%i,%i", &hist_h, &hist_k, &hist_l);
		if ( r != 3 ) {
			ERROR("Invalid indices for '--histogram'\n");
			return 1;
		}
		hist_vals = malloc(10*1024*sizeof(double));
		free(histo);
		STATUS("Histogramming %i %i %i -> ", hist_h, hist_k, hist_l);
		/* Put into the asymmetric cell for the target group */
		get_asymm(hist_h, hist_k, hist_l,
		          &hist_h, &hist_k, &hist_l, sym);
		STATUS("%i %i %i\n", hist_h, hist_k, hist_l);
	}

577
578
579
580
581
582
583
584
585
586
587
	/* Open the data stream */
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}
Thomas White's avatar
Tidy-up    
Thomas White committed
588

589
590
591
592
	/* Count the number of patterns in the file */
	n_total_patterns = count_patterns(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);
	rewind(fh);
593

594
	hist_i = 0;
Thomas White's avatar
Thomas White committed
595
	merge_all(fh, &model, &observed, &counts,
596
	          config_maxonly, config_scale, config_sum, config_stopafter,
597
                  twins, holo, sym, n_total_patterns,
598
                  hist_vals, hist_h, hist_k, hist_l, &hist_i, NULL, NULL, NULL);
599
	rewind(fh);
600

601
602
603
604
605
606
	if ( hist_vals != NULL ) {
		STATUS("%i %i %i was seen %i times.\n", hist_h, hist_k, hist_l,
		                                        hist_i);
		plot_histogram(hist_vals, hist_i);
	}

607
	if ( output != NULL ) {
Thomas White's avatar
Thomas White committed
608
		write_reflections(output, observed, model, NULL, counts, cell);
609
	}
Thomas White's avatar
Thomas White committed
610

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
	if ( config_rmerge ) {

		double *devs = new_list_intensity();
		double *tots = new_list_intensity();
		double total_dev = 0.0;
		double total_tot = 0.0;
		double total_pdev = 0.0;
		double total_rdev = 0.0;
		int i;

		STATUS("Extra pass to calculate figures of merit...\n");

		rewind(fh);
		merge_all(fh, &model, &observed, &counts,
		          config_maxonly, config_scale, 0,
		          config_stopafter, twins, holo, sym, n_total_patterns,
		          NULL, 0, 0, 0, NULL, devs, tots, model);

		for ( i=0; i<num_items(observed); i++ ) {

			struct refl_item *it;
			signed int h, k, l;
			double dev;
			unsigned int count;

			it = get_item(observed, i);
			h = it->h;  k = it->k,  l = it->l;

			dev = lookup_intensity(devs, h, k, l);
			count = lookup_count(counts, h, k, l);

			if ( count < 2 ) continue;

			total_dev += dev;
			total_pdev += sqrt(1.0/(count-1.0))*dev;
			total_rdev += sqrt(count/(count-1.0))*dev;
			total_tot += lookup_intensity(tots, h, k, l);

		}

		STATUS("Rmerge = %f%%\n", 100.0*total_dev/total_tot);
		STATUS("  Rpim = %f%%\n", 100.0*total_pdev/total_tot);
		STATUS("  Rrim = %f%%\n", 100.0*total_rdev/total_tot);

	}

	fclose(fh);
658
	free(sym);
Thomas White's avatar
Thomas White committed
659
	free(model);
Thomas White's avatar
Thomas White committed
660
	free(counts);
Thomas White's avatar
Thomas White committed
661
662
	free(output);
	free(cell);
663

664
665
	return 0;
}