process_hkl.c 14.5 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
Thomas White's avatar
Thomas White committed
6
 * (c) 2006-2010 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflections.h"
28
#include "likelihood.h"
29
#include "symmetry.h"
30
31


Thomas White's avatar
Thomas White committed
32
33
34
35
/* Number of divisions for R vs |q| graphs */
#define RVQDV (20)


36
37
38
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
39
40
41
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
42
43
44
45
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
"                             (don't specify for no output).\n"
46
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
47
"\n"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
"  -c, --compare-with=<file> Compare with reflection intensities in this file\n"
"\n"
"  -e, --output-every=<n>    Analyse figures of merit after every n patterns\n"
"                             Default: 1000.  A value of zero means to do the\n"
"                             analysis only after reading all the patterns.\n"
"  -r, --rvsq                Output lists of R vs |q| (\"Luzzatti plots\")\n"
"                             when analysing figures of merit.\n"
66
67
68
"      --detwin              Correlate each new pattern with the current\n"
"                             model and choose the best fitting out of the\n"
"                             allowable twins.\n"
69
70
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
71
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
72
);
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
}


static void write_RvsQ(const char *name, double *ref, double *trueref,
                       unsigned int *counts, double scale, UnitCell *cell)
{
	FILE *fh;
	double smax, sbracket;
	signed int h, k, l;

	fh = fopen(name, "w");

	smax = 0.0;
	for ( h=-INDMAX; h<INDMAX; h++ ) {
	for ( k=-INDMAX; k<INDMAX; k++ ) {
	for ( l=-INDMAX; l<INDMAX; l++ ) {
Thomas White's avatar
Thomas White committed
89
		double s = 2.0*resolution(cell, h, k, l);
90
91
92
93
94
95
96
		if ( (lookup_count(counts, h, k, l) > 0) && (s > smax) ) {
			smax = s;
		}
	}
	}
	}

Thomas White's avatar
Thomas White committed
97
	for ( sbracket=0.0; sbracket<smax; sbracket+=smax/RVQDV ) {
98
99
100

		double top = 0.0;
		double bot = 0.0;
Thomas White's avatar
Thomas White committed
101
102
103
104
		int nhits = 0;
		int nrefl = 0;
		double R;
		double hits_per_refl;
105
106
107
108
109
110
111

		for ( h=-INDMAX; h<INDMAX; h++ ) {
		for ( k=-INDMAX; k<INDMAX; k++ ) {
		for ( l=-INDMAX; l<INDMAX; l++ ) {

			double s;
			int c;
Thomas White's avatar
Thomas White committed
112
113
114

			if ( (h==0) && (k==0) && (l==0) ) continue;

115
			c = lookup_count(counts, h, k, l);
Thomas White's avatar
Thomas White committed
116
			s = 2.0*resolution(cell, h, k, l);
Thomas White's avatar
Thomas White committed
117
			if ((s>=sbracket) && (s<sbracket+smax/RVQDV) && (c>0)) {
118
119
120
121
122
123
124

				double obs, calc, obsi;

				obs = lookup_intensity(ref, h, k, l);
				calc = lookup_intensity(trueref, h, k, l);

				obsi = obs / (double)c;
Thomas White's avatar
Thomas White committed
125
126
127
128
129
				top += pow(obsi - scale*calc, 2.0);
				bot += pow(obsi, 2.0);
				nhits += c;
				nrefl++;

130
131
132
133
134
135
			}

		}
		}
		}

Thomas White's avatar
Thomas White committed
136
137
138
139
140
		R = sqrt(top/bot);
		hits_per_refl = nrefl ? (double)nhits/nrefl : 0;

		fprintf(fh, "%8.5f %8.5f %5.2f\n", sbracket+smax/(2.0*RVQDV),
		                                R, hits_per_refl);
141
142
143
144
145
146

	}
	fclose(fh);
}


147
148
149
static void process_reflections(double *ref, unsigned int *counts,
                                double *trueref, unsigned int *truecounts,
                                unsigned int n_patterns,
150
                                UnitCell *cell, int do_rvsq)
151
152
153
154
155
156
{
	int j;
	double mean_counts;
	int ctot = 0;
	int nmeas = 0;
	double R, scale;
157
	FILE *fh;
158
159
160
161
162
163
164

	for ( j=0; j<LIST_SIZE; j++ ) {
		ctot += counts[j];
		if ( counts[j] > 0 ) nmeas++;
	}
	mean_counts = (double)ctot/nmeas;

165
	R = stat_r2(ref, counts, trueref, truecounts, &scale);
Thomas White's avatar
Thomas White committed
166
	STATUS("%8u: R=%5.2f%% (sf=%7.4e)  mean meas/refl=%5.2f,"
167
168
	       " %i reflections measured\n",
	       n_patterns, R*100.0, scale, mean_counts, nmeas);
169

Thomas White's avatar
Thomas White committed
170
171
	if ( do_rvsq ) {
		/* Record graph of R against q for this N */
Thomas White's avatar
Thomas White committed
172
		char name[64];
173
		snprintf(name, 63, "R_vs_q-%u.dat", n_patterns);
Thomas White's avatar
Thomas White committed
174
175
		write_RvsQ(name, ref, trueref, counts, scale, cell);
	}
Thomas White's avatar
Thomas White committed
176

177
178
179
	fh = fopen("results/convergence.dat", "a");
	fprintf(fh, "%u %5.2f %5.2f\n", n_patterns, R*100.0, mean_counts);
	fclose(fh);
180
181
182
}


183
184
185
186
187
/* Note "holo" needn't actually be a holohedral point group, if you want to try
 * something strange like resolving from a low-symmetry group into an even
 * lower symmetry one.
 */
static ReflItemList *get_twin_possibilities(const char *holo, const char *mero)
188
{
189
	ReflItemList *test_items;
190
	ReflItemList *twins;
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
	int np;

	np = num_general_equivs(holo) / num_general_equivs(mero);

	test_items = new_items();

	/* Some arbitrarily chosen reflections which can't be special
	 * reflections in any point group, i.e. lots of odd numbers,
	 * prime numbers and so on.  There's probably an analytical
	 * way of working these out, but this will do. */
	add_item(test_items, 1, 2, 3);
	add_item(test_items, 3, 7, 13);
	add_item(test_items, 5, 2, 1);

	twins = get_twins(test_items, holo, mero);
	delete_items(test_items);

	if ( num_items(twins) != np ) {
		ERROR("Whoops! Couldn't find all the twinning possiblities.\n");
		abort();
	}

	return twins;
}


static int resolve_twin(ReflItemList *items, ReflItemList *twins,
                        const double *patt, const double *model,
                        const unsigned int *model_counts,
                        const char *holo, const char *mero)
{
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
	int n, i;
	double best_fom = 0.0;
	int best_op = 0;

	n = num_items(twins);

	for ( i=0; i<n; i++ ) {

		int j;
		int op;
		double *trial_ints = new_list_intensity();
		unsigned int *trial_counts = new_list_count();
		double fom;

		op = get_item(twins, i)->op;

		for ( j=0; j<num_items(items); j++ ) {

			signed int h, k, l;
			struct refl_item *r = get_item(items, j);

			get_general_equiv(r->h, r->k, r->l, &h, &k, &l,
			                  holo, op);
245
			get_asymm(h, k, l, &h, &k, &l, mero);
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

			set_intensity(trial_ints, h, k, l,
			              lookup_intensity(patt, r->h, r->k, r->l));
			set_count(trial_counts, h, k, l, 1);

		}

		fom = stat_pearson(trial_ints, trial_counts,
		                   model, model_counts);

		free(trial_ints);
		free(trial_counts);

		//printf(" %f", fom);
		if ( fom > best_fom ) {
			best_fom = fom;
			best_op = op;
		}

	}
	//printf("\n");

	return best_op;
}


272
273
static void merge_pattern(double *model, const double *new,
                          unsigned int *model_counts,
274
                          ReflItemList *items, int mo, int sum,
275
276
                          ReflItemList *twins,
                          const char *holo, const char *mero)
277
{
278
	int i;
279
280
	int twin;

281
282
283
284
285
286
	if ( twins != NULL ) {
		twin = resolve_twin(items, twins, new, model,
		                     model_counts, holo, mero);
	} else {
		twin = 0;
	}
287

288
	for ( i=0; i<num_items(items); i++ ) {
289
290

		double intensity;
291
		signed int hs, ks, ls;
292
293
		signed int h, k, l;
		struct refl_item *item;
294

295
296
		item = get_item(items, i);

297
298
299
300
		hs = item->h;
		ks = item->k;
		ls = item->l;

301
		get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin);
302
		get_asymm(h, k, l, &h, &k, &l, mero);
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323

		intensity = lookup_intensity(new, h, k, l);

		if ( !mo ) {
			integrate_intensity(model, h, k, l, intensity);
			if ( sum ) {
				set_count(model_counts, h, k, l, 1);
			} else {
				integrate_count(model_counts, h, k, l, 1);
			}
		} else {
			if ( intensity > lookup_intensity(model, h, k, l) ) {
				set_intensity(model, h, k, l, intensity);
			}
			set_count(model_counts, h, k, l, 1);
		}

	}
}


324
325
326
327
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
328
	char *output = NULL;
329
330
	FILE *fh;
	unsigned int n_patterns;
331
332
	double *model, *trueref = NULL;
	unsigned int *model_counts;
333
	char *rval;
334
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
335
	int config_maxonly = 0;
Thomas White's avatar
Thomas White committed
336
	int config_every = 1000;
Thomas White's avatar
Thomas White committed
337
	int config_rvsq = 0;
Thomas White's avatar
Thomas White committed
338
	int config_stopafter = 0;
339
	int config_sum = 0;
340
	int config_detwin = 0;
341
	int config_scale = 0;
342
	char *intfile = NULL;
343
344
	double *new_pattern = NULL;
	unsigned int n_total_patterns;
345
	unsigned int *truecounts = NULL;
346
	char *sym = NULL;
347
	char *pdb = NULL;
348
	float f0;
Thomas White's avatar
Thomas White committed
349
	int f0_valid;
350
	int n_nof0 = 0;
351
	ReflItemList *items;
352
353
	ReflItemList *twins;
	int i;
354
355
356
357
358

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
359
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
360
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
361
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
362
		{"rvsq",               0, NULL,               'r'},
Thomas White's avatar
Thomas White committed
363
		{"stop-after",         1, NULL,               's'},
364
		{"compare-with",       0, NULL,               'c'},
365
366
		{"sum",                0, &config_sum,         1},
		{"detwin",             0, &config_detwin,      1},
367
		{"scale",              0, &config_scale,       1},
368
		{"symmetry",           1, NULL,               'y'},
369
		{"pdb",                1, NULL,               'p'},
370
371
372
373
		{0, 0, NULL, 0}
	};

	/* Short options */
374
375
	while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:",
	                        longopts, NULL)) != -1) {
376
377

		switch (c) {
Thomas White's avatar
Thomas White committed
378
		case 'h' :
379
380
381
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
382
		case 'i' :
383
384
385
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
386
		case 'o' :
387
388
389
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
390
391

		case 'r' :
Thomas White's avatar
Thomas White committed
392
393
394
			config_rvsq = 1;
			break;

Thomas White's avatar
Thomas White committed
395
		case 'e' :
Thomas White's avatar
Thomas White committed
396
397
398
			config_every = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
399
		case 's' :
Thomas White's avatar
Thomas White committed
400
401
402
			config_stopafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
403
		case 'c' :
404
405
406
			intfile = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
407
		case 'p' :
408
409
410
			pdb = strdup(optarg);
			break;

411
412
413
414
		case 'y' :
			sym = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
415
		case 0 :
416
417
			break;

Thomas White's avatar
Thomas White committed
418
		default :
419
420
421
422
423
424
425
426
427
428
			return 1;
		}

	}

	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

429
	if ( intfile != NULL ) {
430
		truecounts = new_list_count();
431
		STATUS("Comparing against '%s'\n", intfile);
432
		trueref = read_reflections(intfile, truecounts, NULL);
433
434
435
436
437
		free(intfile);
	} else {
		trueref = NULL;
	}

438
439
440
441
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

442
443
444
445
	if ( sym == NULL ) {
		sym = strdup("1");
	}

446
447
448
	model = new_list_intensity();
	model_counts = new_list_count();
	new_pattern = new_list_intensity();
449
	items = new_items();
450

Thomas White's avatar
Thomas White committed
451
452
453
	cell = load_cell_from_pdb(pdb);
	free(pdb);

454
455
456
457
458
459
460
461
462
463
464
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}

Thomas White's avatar
Thomas White committed
465
466
467
468
469
470
471
472
473
474
475
476
477
478
	/* Count the number of patterns in the file */
	n_total_patterns = 0;
	do {
		char line[1024];

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
		    n_total_patterns++;
		}
	} while ( rval != NULL );
	rewind(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);

479
480
481
	/* Show useful symmetry information */
	const char *holo = get_holohedral(sym);
	int np = num_general_equivs(holo) / num_general_equivs(sym);
482
483
484
485
486
487
488
489
490
491
492
	if ( np > 1 ) {

		STATUS("Resolving point group %s into %s (%i possibilities)\n",
		       holo, sym, np);
		/* Get the list of twin/Bijvoet possibilities */
		twins = get_twin_possibilities(holo, sym);
		STATUS("Twin/inversion operation indices (from %s) are:", holo);
		for ( i=0; i<num_items(twins); i++ ) {
			STATUS(" %i", get_item(twins, i)->op);
		}
		STATUS("\n");
493

494
495
496
497
	} else {
		STATUS("No twin/inversion resolution necessary.\n");
		twins = NULL;
	}
498

499
	n_patterns = 0;
Thomas White's avatar
Thomas White committed
500
	f0_valid = 0;
501
502
503
	do {

		char line[1024];
504
505
		signed int h, k, l;
		float intensity;
506
507
508
		int r;

		rval = fgets(line, 1023, fh);
509
510
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
Thomas White's avatar
Tidy-up    
Thomas White committed
511

512
			/* Start of first pattern? */
Thomas White's avatar
Thomas White committed
513
514
515
516
			if ( n_patterns == 0 ) {
				n_patterns++;
				continue;
			}
Thomas White's avatar
Tidy-up    
Thomas White committed
517

518
			/* Detwin before scaling */
519
520
			if ( config_detwin ) {
				detwin_intensities(model, new_pattern,
521
				                   model_counts, items);
522
523
			}

Thomas White's avatar
Thomas White committed
524
525
			/* Assume a default I0 if we don't have one by now */
			if ( config_scale && !f0_valid ) {
526
				n_nof0++;
527
528
529
				f0 = 1.0;
			}

530
531
532
			/* Scale if requested */
			if ( config_scale ) {
				scale_intensities(model, new_pattern,
533
				                  model_counts, items, f0,
Thomas White's avatar
Thomas White committed
534
				                  f0_valid);
535
536
			}

537
538
			/* Start of second or later pattern */
			merge_pattern(model, new_pattern, model_counts,
539
540
			              items, config_maxonly, config_sum, twins,
			              holo, sym);
541

542
543
			if ( (trueref != NULL) && config_every
			    && (n_patterns % config_every == 0) ) {
544
545
546
				process_reflections(model, model_counts,
				                    trueref, truecounts,
				                    n_patterns, cell,
547
				                    config_rvsq);
548
			}
Thomas White's avatar
Thomas White committed
549
550
551

			if ( n_patterns == config_stopafter ) break;

Thomas White's avatar
Thomas White committed
552
			n_patterns++;
553
			clear_items(items);
Thomas White's avatar
Thomas White committed
554
555
556

			progress_bar(n_patterns, n_total_patterns, "Merging");

Thomas White's avatar
Thomas White committed
557
			f0_valid = 0;
558
559
560
561
562
563

		}

		if ( strncmp(line, "f0 = ", 5) == 0 ) {
			r = sscanf(line, "f0 = %f", &f0);
			if ( r != 1 ) {
Thomas White's avatar
Thomas White committed
564
565
				f0 = 1.0;
				f0_valid = 0;
566
567
				continue;
			}
Thomas White's avatar
Thomas White committed
568
			f0_valid = 1;
569
570
		}

571
		r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity);
572
573
		if ( r != 4 ) continue;

Thomas White's avatar
Tidy-up    
Thomas White committed
574
575
		if ( (h==0) && (k==0) && (l==0) ) continue;

576
		if ( find_item(items, h, k, l) != 0 ) {
577
578
			ERROR("More than one measurement for %i %i %i in"
			      " pattern number %i\n", h, k, l, n_patterns);
Thomas White's avatar
Thomas White committed
579
		}
580
		set_intensity(new_pattern, h, k, l, intensity);
581
		add_item(items, h, k, l);
582
583
584
585
586

	} while ( rval != NULL );

	fclose(fh);

587
	if ( trueref != NULL ) {
588
		process_reflections(model, model_counts, trueref, truecounts,
589
		                    n_patterns, cell, config_rvsq);
Thomas White's avatar
Thomas White committed
590
	}
591

592
	if ( output != NULL ) {
593
594
		write_reflections(output, model_counts, model, NULL,
		                  0, cell, 1);
595
	}
Thomas White's avatar
Thomas White committed
596
597

	STATUS("There were %u patterns.\n", n_patterns);
598
	STATUS("%i had no f0 valid value.\n", n_nof0);
599

600
	delete_items(items);
601
	free(sym);
Thomas White's avatar
Thomas White committed
602
603
604
605
606
	free(model);
	free(model_counts);
	free(new_pattern);
	free(output);
	free(cell);
607

608
609
	return 0;
}