process_hkl.c 14.6 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
Thomas White's avatar
Thomas White committed
6
 * (c) 2006-2010 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflections.h"
28
#include "likelihood.h"
29
#include "symmetry.h"
30
31


32
/* Number of divisions for intensity histograms */
33
#define NBINS (50)
Thomas White's avatar
Thomas White committed
34
35


36
37
38
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
39
40
41
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
42
43
44
45
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
"                             (don't specify for no output).\n"
46
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
47
"\n"
48
49
50
51
52
53
54
55
56
57
58
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
59
60
"  -g, --histogram=<h,k,l>   Calculate the histogram of measurements for this\n"
"                             reflection.\n"
61
"\n"
62
63
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
64
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
65
);
66
67
68
}


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
static void plot_histogram(double *vals, int n)
{
	int i;
	double max = -INFINITY;
	double min = +INFINITY;
	double step;
	int histo[NBINS];
	FILE *fh;

	fh = fopen("histogram.dat", "w");
	if ( fh == NULL ) {
		ERROR("Couldn't open 'histogram.dat'\n");
		return;
	}

	for ( i=0; i<n; i++ ) {
		if ( vals[i] > max ) max = vals[i];
		if ( vals[i] < min ) min = vals[i];
	}
	STATUS("%f %f\n", min, max);

	for ( i=0; i<NBINS; i++ ) {
		histo[i] = 0;
	}

	step = (max-min)/NBINS;

	for ( i=0; i<n; i++ ) {
		int bin;
		bin = (vals[i]-min)/step;
		histo[bin]++;
	}

	for ( i=0; i<NBINS; i++ ) {
		fprintf(fh, "%f %i\n", min+step*i, histo[i]);
	}

	fclose(fh);
}


110
111
112
113
114
/* Note "holo" needn't actually be a holohedral point group, if you want to try
 * something strange like resolving from a low-symmetry group into an even
 * lower symmetry one.
 */
static ReflItemList *get_twin_possibilities(const char *holo, const char *mero)
115
{
116
	ReflItemList *test_items;
117
	ReflItemList *twins;
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
	int np;

	np = num_general_equivs(holo) / num_general_equivs(mero);

	test_items = new_items();

	/* Some arbitrarily chosen reflections which can't be special
	 * reflections in any point group, i.e. lots of odd numbers,
	 * prime numbers and so on.  There's probably an analytical
	 * way of working these out, but this will do. */
	add_item(test_items, 1, 2, 3);
	add_item(test_items, 3, 7, 13);
	add_item(test_items, 5, 2, 1);

	twins = get_twins(test_items, holo, mero);
	delete_items(test_items);

Thomas White's avatar
Thomas White committed
135
136
	/* Idiot check.  Wouldn't be necessary if I could prove that the above
	 * set of arbitrarily chosen reflections were always general. */
137
138
139
140
141
142
143
144
145
	if ( num_items(twins) != np ) {
		ERROR("Whoops! Couldn't find all the twinning possiblities.\n");
		abort();
	}

	return twins;
}


Thomas White's avatar
Thomas White committed
146
147
148
static int resolve_twin(const double *model, ReflItemList *observed,
                        const double *patt, ReflItemList *items,
                        ReflItemList *twins, const char *holo, const char *mero)
149
{
150
151
152
153
154
155
156
157
158
159
160
161
162
	int n, i;
	double best_fom = 0.0;
	int best_op = 0;

	n = num_items(twins);

	for ( i=0; i<n; i++ ) {

		int j;
		int op;
		double *trial_ints = new_list_intensity();
		unsigned int *trial_counts = new_list_count();
		double fom;
Thomas White's avatar
Thomas White committed
163
		ReflItemList *intersection;
164
165
166
167
168
169
170
171
172
173

		op = get_item(twins, i)->op;

		for ( j=0; j<num_items(items); j++ ) {

			signed int h, k, l;
			struct refl_item *r = get_item(items, j);

			get_general_equiv(r->h, r->k, r->l, &h, &k, &l,
			                  holo, op);
174
			get_asymm(h, k, l, &h, &k, &l, mero);
175
176
177
178
179
180
181

			set_intensity(trial_ints, h, k, l,
			              lookup_intensity(patt, r->h, r->k, r->l));
			set_count(trial_counts, h, k, l, 1);

		}

Thomas White's avatar
Thomas White committed
182
183
184
		intersection = intersection_items(observed, items);
		fom = stat_pearson(trial_ints, model, intersection);
		delete_items(intersection);
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201

		free(trial_ints);
		free(trial_counts);

		//printf(" %f", fom);
		if ( fom > best_fom ) {
			best_fom = fom;
			best_op = op;
		}

	}
	//printf("\n");

	return best_op;
}


Thomas White's avatar
Thomas White committed
202
203
204
static void merge_pattern(double *model, ReflItemList *observed,
                          const double *new,  ReflItemList *items,
                          unsigned int *model_counts,  int mo,
205
                          ReflItemList *twins,
206
207
208
                          const char *holo, const char *mero, double *hist_vals,
                          signed int hist_h, signed int hist_k,
                          signed int hist_l, int *hist_n)
209
{
210
	int i;
211
	int twin;
212
	ReflItemList *sym_items = new_items();
213

214
	if ( twins != NULL ) {
Thomas White's avatar
Thomas White committed
215
216
		twin = resolve_twin(model, observed, new, items,
		                    twins, holo, mero);
217
218
219
	} else {
		twin = 0;
	}
220

221
	for ( i=0; i<num_items(items); i++ ) {
222
223

		double intensity;
224
		signed int hs, ks, ls;
225
226
		signed int h, k, l;
		struct refl_item *item;
227

228
229
		item = get_item(items, i);

230
231
232
233
		hs = item->h;
		ks = item->k;
		ls = item->l;

234
235
		/* Transform into correct side of the twin law.
		 * "twin" is always zero if no de-twinning is performed. */
236
		get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin);
237
238

		/* Put into the asymmetric cell for the target group */
239
		get_asymm(h, k, l, &h, &k, &l, mero);
240
241
242

		intensity = lookup_intensity(new, h, k, l);

Thomas White's avatar
Thomas White committed
243
		/* User asked for max only? */
244
245
246
247
248
249
250
251
		if ( !mo ) {
			integrate_intensity(model, h, k, l, intensity);
		} else {
			if ( intensity > lookup_intensity(model, h, k, l) ) {
				set_intensity(model, h, k, l, intensity);
			}
		}

252
253
254
255
256
257
258
259
260
261
		/* Already seen this reflection in this pattern? Complain. */
		if ( !find_item(sym_items, h, k, l) ) {
			/* Add the asymmetric version of this reflection to our
			 * temporary list.  One reflection (in the asymmetric
			 * unit) may appear more than once per pattern if
			 * symmetrically related reflections are present.
			 * That's fine... */
		}	add_item(sym_items, h, k, l);

		/* Increase count count */
Thomas White's avatar
Thomas White committed
262
263
		integrate_count(model_counts, h, k, l, 1);

264
265
266
267
268
269
270
271
		if ( hist_vals != NULL ) {
			int p = *hist_n;
			if ( (h==hist_h) && (k==hist_k) && (l==hist_l) ) {
				hist_vals[p] = intensity;
				*hist_n = p+1;
			}
		}

272
	}
273
274
275
276
277

	/* Dump the reflections in this pattern into the overall list */
	union_items(observed, sym_items);

	delete_items(sym_items);
278
279
280
}


Thomas White's avatar
Thomas White committed
281
282
static void merge_all(FILE *fh, double **pmodel, ReflItemList **pobserved,
                      unsigned int **pcounts,
283
284
285
                      int config_maxonly, int config_scale, int config_sum,
                      int config_stopafter,
                      ReflItemList *twins, const char *holo, const char *mero,
286
287
288
                      int n_total_patterns, double *hist_vals,
                      signed int hist_h, signed int hist_k, signed int hist_l,
                      int *hist_i)
289
290
291
292
293
294
295
296
{
	char *rval;
	float f0;
	int n_nof0 = 0;
	int f0_valid = 0;
	int n_patterns = 0;
	double *new_pattern = new_list_intensity();
	ReflItemList *items = new_items();
Thomas White's avatar
Thomas White committed
297
298
299
300
	ReflItemList *observed = new_items();
	double *model = new_list_intensity();
	unsigned int *counts = new_list_count();
	int i;
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326

	do {

		char line[1024];
		signed int h, k, l;
		float intensity;
		int r;

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {

			/* Start of first pattern? */
			if ( n_patterns == 0 ) {
				n_patterns++;
				continue;
			}

			/* Assume a default I0 if we don't have one by now */
			if ( config_scale && !f0_valid ) {
				n_nof0++;
				f0 = 1.0;
			}

			/* Scale if requested */
			if ( config_scale ) {
Thomas White's avatar
Thomas White committed
327
328
329
				scale_intensities(model, observed,
				                  new_pattern, items,
				                  f0, f0_valid);
330
331
332
			}

			/* Start of second or later pattern */
Thomas White's avatar
Thomas White committed
333
334
			merge_pattern(model, observed, new_pattern, items,
			              counts, config_maxonly,
335
336
337
			              twins, holo, mero,
			              hist_vals, hist_h, hist_k, hist_l,
			              hist_i);
338
339
340

			if ( n_patterns == config_stopafter ) break;

341
			/* Reset for the next pattern */
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
			n_patterns++;
			clear_items(items);

			progress_bar(n_patterns, n_total_patterns, "Merging");

			f0_valid = 0;

		}

		if ( strncmp(line, "f0 = ", 5) == 0 ) {
			r = sscanf(line, "f0 = %f", &f0);
			if ( r != 1 ) {
				f0 = 1.0;
				f0_valid = 0;
				continue;
			}
			f0_valid = 1;
		}

		r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity);
		if ( r != 4 ) continue;

364
		/* Not interested in the central beam */
365
366
		if ( (h==0) && (k==0) && (l==0) ) continue;

367
368
		/* The same raw indices (before mapping into the asymmetric
		 * unit should not turn up twice in one pattern. */
369
370
371
372
373
		if ( find_item(items, h, k, l) != 0 ) {
			ERROR("More than one measurement for %i %i %i in"
			      " pattern number %i\n", h, k, l, n_patterns);
		}
		set_intensity(new_pattern, h, k, l, intensity);
374
375
376

		/* NB: This list contains raw indices, before working out
		 * where they belong in the asymmetric unit. */
377
378
379
380
381
382
383
		add_item(items, h, k, l);

	} while ( rval != NULL );

	delete_items(items);
	free(new_pattern);

384
385
	/* Calculate mean intensity if necessary */
	if ( !config_sum && !config_maxonly ) {
Thomas White's avatar
Thomas White committed
386
387
388
389
390
391
392
393
394
395
396
		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
			if ( counts[i] > 0 ) {
				model[i] /= (double)counts[i];
			}
		}
	}

	*pmodel = model;
	*pcounts = counts;
	*pobserved = observed;

397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
	STATUS("%i patterns had no f0 valid value.\n", n_nof0);
}


static int count_patterns(FILE *fh)
{
	char *rval;

	int n_total_patterns = 0;
	do {
		char line[1024];

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
		    n_total_patterns++;
		}
	} while ( rval != NULL );

	return n_total_patterns;
}


420
421
422
423
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
424
	char *output = NULL;
425
	FILE *fh;
426
	double *model;
Thomas White's avatar
Thomas White committed
427
	unsigned int *counts;
428
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
429
	int config_maxonly = 0;
Thomas White's avatar
Thomas White committed
430
	int config_stopafter = 0;
431
	int config_sum = 0;
432
	int config_scale = 0;
433
	unsigned int n_total_patterns;
434
	char *sym = NULL;
435
	char *pdb = NULL;
436
	ReflItemList *twins;
Thomas White's avatar
Thomas White committed
437
	ReflItemList *observed;
438
	int i;
439
	const char *holo = NULL;
440
441
442
443
	char *histo = NULL;
	signed int hist_h, hist_k, hist_l;
	double *hist_vals = NULL;
	int hist_i;
444
445
446
447
448

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
449
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
450
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
451
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
452
		{"stop-after",         1, NULL,               's'},
453
		{"sum",                0, &config_sum,         1},
454
		{"scale",              0, &config_scale,       1},
455
		{"symmetry",           1, NULL,               'y'},
456
		{"pdb",                1, NULL,               'p'},
457
		{"histogram",          1, NULL,               'g'},
458
459
460
461
		{0, 0, NULL, 0}
	};

	/* Short options */
462
	while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:g:",
463
	                        longopts, NULL)) != -1) {
464
465

		switch (c) {
Thomas White's avatar
Thomas White committed
466
		case 'h' :
467
468
469
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
470
		case 'i' :
471
472
473
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
474
		case 'o' :
475
476
477
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
478
		case 's' :
Thomas White's avatar
Thomas White committed
479
480
481
			config_stopafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
482
		case 'p' :
483
484
485
			pdb = strdup(optarg);
			break;

486
487
488
489
		case 'y' :
			sym = strdup(optarg);
			break;

490
491
492
493
		case 'g' :
			histo = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
494
		case 0 :
495
496
			break;

Thomas White's avatar
Thomas White committed
497
		default :
498
499
500
501
502
503
504
505
506
507
			return 1;
		}

	}

	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

508
509
510
511
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

Thomas White's avatar
Thomas White committed
512
513
514
	cell = load_cell_from_pdb(pdb);
	free(pdb);

515
	/* Show useful symmetry information */
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
	if ( sym != NULL ) {
		holo = get_holohedral(sym);
		int np = num_general_equivs(holo) / num_general_equivs(sym);
		if ( np > 1 ) {

			STATUS("Resolving point group %s into %s "
			       "(%i possibilities)\n",
			       holo, sym, np);
			/* Get the list of twin/Bijvoet possibilities */
			twins = get_twin_possibilities(holo, sym);
			STATUS("Twin/inversion operation indices from %s are:",
			       holo);
			for ( i=0; i<num_items(twins); i++ ) {
				STATUS(" %i", get_item(twins, i)->op);
			}
			STATUS("\n");
532

533
534
535
536
		} else {
			STATUS("No twin/inversion resolution necessary.\n");
			twins = NULL;
		}
537
	} else {
538
		STATUS("Not performing any twin/inversion resolution.\n");
539
		twins = NULL;
540
541
		sym = strdup("1");
		holo = strdup("1");
542
	}
543

544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
	if ( histo != NULL ) {
		int r;
		r = sscanf(histo, "%i,%i,%i", &hist_h, &hist_k, &hist_l);
		if ( r != 3 ) {
			ERROR("Invalid indices for '--histogram'\n");
			return 1;
		}
		hist_vals = malloc(10*1024*sizeof(double));
		free(histo);
		STATUS("Histogramming %i %i %i -> ", hist_h, hist_k, hist_l);
		/* Put into the asymmetric cell for the target group */
		get_asymm(hist_h, hist_k, hist_l,
		          &hist_h, &hist_k, &hist_l, sym);
		STATUS("%i %i %i\n", hist_h, hist_k, hist_l);
	}

560
561
562
563
564
565
566
567
568
569
570
	/* Open the data stream */
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}
Thomas White's avatar
Tidy-up    
Thomas White committed
571

572
573
574
575
	/* Count the number of patterns in the file */
	n_total_patterns = count_patterns(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);
	rewind(fh);
576

577
	hist_i = 0;
Thomas White's avatar
Thomas White committed
578
	merge_all(fh, &model, &observed, &counts,
579
	          config_maxonly, config_scale, config_sum, config_stopafter,
580
581
                  twins, holo, sym, n_total_patterns,
                  hist_vals, hist_h, hist_k, hist_l, &hist_i);
582
	rewind(fh);
583
584
585

	fclose(fh);

586
587
588
589
590
591
	if ( hist_vals != NULL ) {
		STATUS("%i %i %i was seen %i times.\n", hist_h, hist_k, hist_l,
		                                        hist_i);
		plot_histogram(hist_vals, hist_i);
	}

592
	if ( output != NULL ) {
Thomas White's avatar
Thomas White committed
593
		write_reflections(output, observed, model, NULL, counts, cell);
594
	}
Thomas White's avatar
Thomas White committed
595

596
	free(sym);
Thomas White's avatar
Thomas White committed
597
	free(model);
Thomas White's avatar
Thomas White committed
598
	free(counts);
Thomas White's avatar
Thomas White committed
599
600
	free(output);
	free(cell);
601

602
603
	return 0;
}