process_hkl.c 13.2 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
Thomas White's avatar
Thomas White committed
6
 * (c) 2006-2010 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflections.h"
28
#include "likelihood.h"
29
#include "symmetry.h"
30
31


Thomas White's avatar
Thomas White committed
32
33
34
35
/* Number of divisions for R vs |q| graphs */
#define RVQDV (20)


36
37
38
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
39
40
41
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
42
43
44
45
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
"                             (don't specify for no output).\n"
46
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
47
"\n"
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
"  -c, --compare-with=<file> Compare with reflection intensities in this file\n"
"\n"
"  -e, --output-every=<n>    Analyse figures of merit after every n patterns\n"
"                             Default: 1000.  A value of zero means to do the\n"
"                             analysis only after reading all the patterns.\n"
"  -r, --rvsq                Output lists of R vs |q| (\"Luzzatti plots\")\n"
"                             when analysing figures of merit.\n"
66
67
68
"      --detwin              Correlate each new pattern with the current\n"
"                             model and choose the best fitting out of the\n"
"                             allowable twins.\n"
69
70
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
71
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
72
);
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
}


static void write_RvsQ(const char *name, double *ref, double *trueref,
                       unsigned int *counts, double scale, UnitCell *cell)
{
	FILE *fh;
	double smax, sbracket;
	signed int h, k, l;

	fh = fopen(name, "w");

	smax = 0.0;
	for ( h=-INDMAX; h<INDMAX; h++ ) {
	for ( k=-INDMAX; k<INDMAX; k++ ) {
	for ( l=-INDMAX; l<INDMAX; l++ ) {
Thomas White's avatar
Thomas White committed
89
		double s = 2.0*resolution(cell, h, k, l);
90
91
92
93
94
95
96
		if ( (lookup_count(counts, h, k, l) > 0) && (s > smax) ) {
			smax = s;
		}
	}
	}
	}

Thomas White's avatar
Thomas White committed
97
	for ( sbracket=0.0; sbracket<smax; sbracket+=smax/RVQDV ) {
98
99
100

		double top = 0.0;
		double bot = 0.0;
Thomas White's avatar
Thomas White committed
101
102
103
104
		int nhits = 0;
		int nrefl = 0;
		double R;
		double hits_per_refl;
105
106
107
108
109
110
111

		for ( h=-INDMAX; h<INDMAX; h++ ) {
		for ( k=-INDMAX; k<INDMAX; k++ ) {
		for ( l=-INDMAX; l<INDMAX; l++ ) {

			double s;
			int c;
Thomas White's avatar
Thomas White committed
112
113
114

			if ( (h==0) && (k==0) && (l==0) ) continue;

115
			c = lookup_count(counts, h, k, l);
Thomas White's avatar
Thomas White committed
116
			s = 2.0*resolution(cell, h, k, l);
Thomas White's avatar
Thomas White committed
117
			if ((s>=sbracket) && (s<sbracket+smax/RVQDV) && (c>0)) {
118
119
120
121
122
123
124

				double obs, calc, obsi;

				obs = lookup_intensity(ref, h, k, l);
				calc = lookup_intensity(trueref, h, k, l);

				obsi = obs / (double)c;
Thomas White's avatar
Thomas White committed
125
126
127
128
129
				top += pow(obsi - scale*calc, 2.0);
				bot += pow(obsi, 2.0);
				nhits += c;
				nrefl++;

130
131
132
133
134
135
			}

		}
		}
		}

Thomas White's avatar
Thomas White committed
136
137
138
139
140
		R = sqrt(top/bot);
		hits_per_refl = nrefl ? (double)nhits/nrefl : 0;

		fprintf(fh, "%8.5f %8.5f %5.2f\n", sbracket+smax/(2.0*RVQDV),
		                                R, hits_per_refl);
141
142
143
144
145
146

	}
	fclose(fh);
}


147
148
149
static void process_reflections(double *ref, unsigned int *counts,
                                double *trueref, unsigned int *truecounts,
                                unsigned int n_patterns,
150
                                UnitCell *cell, int do_rvsq)
151
152
153
154
155
156
{
	int j;
	double mean_counts;
	int ctot = 0;
	int nmeas = 0;
	double R, scale;
157
	FILE *fh;
158
159
160
161
162
163
164

	for ( j=0; j<LIST_SIZE; j++ ) {
		ctot += counts[j];
		if ( counts[j] > 0 ) nmeas++;
	}
	mean_counts = (double)ctot/nmeas;

165
	R = stat_r2(ref, counts, trueref, truecounts, &scale);
Thomas White's avatar
Thomas White committed
166
	STATUS("%8u: R=%5.2f%% (sf=%7.4e)  mean meas/refl=%5.2f,"
167
168
	       " %i reflections measured\n",
	       n_patterns, R*100.0, scale, mean_counts, nmeas);
169

Thomas White's avatar
Thomas White committed
170
171
	if ( do_rvsq ) {
		/* Record graph of R against q for this N */
Thomas White's avatar
Thomas White committed
172
		char name[64];
173
		snprintf(name, 63, "R_vs_q-%u.dat", n_patterns);
Thomas White's avatar
Thomas White committed
174
175
		write_RvsQ(name, ref, trueref, counts, scale, cell);
	}
Thomas White's avatar
Thomas White committed
176

177
178
179
	fh = fopen("results/convergence.dat", "a");
	fprintf(fh, "%u %5.2f %5.2f\n", n_patterns, R*100.0, mean_counts);
	fclose(fh);
180
181
182
}


183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
static int resolve_twin(ReflItemList *items, const char *symm,
                        const double *patt, const double *model,
                        const unsigned int *model_counts)
{
	ReflItemList *twins;
	const char *holo;
	int n, i;
	double best_fom = 0.0;
	int best_op = 0;

	/* How many possible twinned orientations are there? */
	twins = get_twins(items, symm);

	holo = get_holohedral(symm);

	n = num_items(twins);

	for ( i=0; i<n; i++ ) {

		int j;
		int op;
		double *trial_ints = new_list_intensity();
		unsigned int *trial_counts = new_list_count();
		double fom;

		op = get_item(twins, i)->op;

		for ( j=0; j<num_items(items); j++ ) {

			signed int h, k, l;
			struct refl_item *r = get_item(items, j);

			get_general_equiv(r->h, r->k, r->l, &h, &k, &l,
			                  holo, op);
			get_asymm(h, k, l, &h, &k, &l, symm);

			set_intensity(trial_ints, h, k, l,
			              lookup_intensity(patt, r->h, r->k, r->l));
			set_count(trial_counts, h, k, l, 1);

		}

		fom = stat_pearson(trial_ints, trial_counts,
		                   model, model_counts);

		free(trial_ints);
		free(trial_counts);

		//printf(" %f", fom);
		if ( fom > best_fom ) {
			best_fom = fom;
			best_op = op;
		}

	}
	//printf("\n");

	delete_items(twins);
	return best_op;
}


245
246
static void merge_pattern(double *model, const double *new,
                          unsigned int *model_counts,
247
248
                          ReflItemList *items, int mo, int sum,
                          const char *symm)
249
{
250
	int i;
251
252
253
254
	const char *holo = get_holohedral(symm);  /* Needed to look up later */
	int twin;

	twin = resolve_twin(items, symm, new, model, model_counts);
255

256
	for ( i=0; i<num_items(items); i++ ) {
257
258

		double intensity;
259
		signed int hs, ks, ls;
260
261
		signed int h, k, l;
		struct refl_item *item;
262

263
264
		item = get_item(items, i);

265
266
267
268
		hs = item->h;
		ks = item->k;
		ls = item->l;

269
270
		get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin);
		get_asymm(h, k, l, &h, &k, &l, symm);
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

		intensity = lookup_intensity(new, h, k, l);

		if ( !mo ) {
			integrate_intensity(model, h, k, l, intensity);
			if ( sum ) {
				set_count(model_counts, h, k, l, 1);
			} else {
				integrate_count(model_counts, h, k, l, 1);
			}
		} else {
			if ( intensity > lookup_intensity(model, h, k, l) ) {
				set_intensity(model, h, k, l, intensity);
			}
			set_count(model_counts, h, k, l, 1);
		}

	}
}


292
293
294
295
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
296
	char *output = NULL;
297
298
	FILE *fh;
	unsigned int n_patterns;
299
300
	double *model, *trueref = NULL;
	unsigned int *model_counts;
301
	char *rval;
302
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
303
	int config_maxonly = 0;
Thomas White's avatar
Thomas White committed
304
	int config_every = 1000;
Thomas White's avatar
Thomas White committed
305
	int config_rvsq = 0;
Thomas White's avatar
Thomas White committed
306
	int config_stopafter = 0;
307
	int config_sum = 0;
308
	int config_detwin = 0;
309
	int config_scale = 0;
310
	char *intfile = NULL;
311
312
	double *new_pattern = NULL;
	unsigned int n_total_patterns;
313
	unsigned int *truecounts = NULL;
314
	char *sym = NULL;
315
	char *pdb = NULL;
316
	float f0;
Thomas White's avatar
Thomas White committed
317
	int f0_valid;
318
	int n_nof0 = 0;
319
	ReflItemList *items;
320
321
322
323
324

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
325
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
326
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
327
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
328
		{"rvsq",               0, NULL,               'r'},
Thomas White's avatar
Thomas White committed
329
		{"stop-after",         1, NULL,               's'},
330
		{"compare-with",       0, NULL,               'c'},
331
332
		{"sum",                0, &config_sum,         1},
		{"detwin",             0, &config_detwin,      1},
333
		{"scale",              0, &config_scale,       1},
334
		{"symmetry",           1, NULL,               'y'},
335
		{"pdb",                1, NULL,               'p'},
336
337
338
339
		{0, 0, NULL, 0}
	};

	/* Short options */
340
341
	while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:",
	                        longopts, NULL)) != -1) {
342
343

		switch (c) {
Thomas White's avatar
Thomas White committed
344
		case 'h' :
345
346
347
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
348
		case 'i' :
349
350
351
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
352
		case 'o' :
353
354
355
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
356
357

		case 'r' :
Thomas White's avatar
Thomas White committed
358
359
360
			config_rvsq = 1;
			break;

Thomas White's avatar
Thomas White committed
361
		case 'e' :
Thomas White's avatar
Thomas White committed
362
363
364
			config_every = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
365
		case 's' :
Thomas White's avatar
Thomas White committed
366
367
368
			config_stopafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
369
		case 'c' :
370
371
372
			intfile = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
373
		case 'p' :
374
375
376
			pdb = strdup(optarg);
			break;

377
378
379
380
		case 'y' :
			sym = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
381
		case 0 :
382
383
			break;

Thomas White's avatar
Thomas White committed
384
		default :
385
386
387
388
389
390
391
392
393
394
			return 1;
		}

	}

	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

395
	if ( intfile != NULL ) {
396
		truecounts = new_list_count();
397
		STATUS("Comparing against '%s'\n", intfile);
398
		trueref = read_reflections(intfile, truecounts, NULL);
399
400
401
402
403
		free(intfile);
	} else {
		trueref = NULL;
	}

404
405
406
407
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

408
409
410
411
	if ( sym == NULL ) {
		sym = strdup("1");
	}

412
413
414
	model = new_list_intensity();
	model_counts = new_list_count();
	new_pattern = new_list_intensity();
415
	items = new_items();
416

Thomas White's avatar
Thomas White committed
417
418
419
	cell = load_cell_from_pdb(pdb);
	free(pdb);

420
421
422
423
424
425
426
427
428
429
430
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}

Thomas White's avatar
Thomas White committed
431
432
433
434
435
436
437
438
439
440
441
442
443
444
	/* Count the number of patterns in the file */
	n_total_patterns = 0;
	do {
		char line[1024];

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
		    n_total_patterns++;
		}
	} while ( rval != NULL );
	rewind(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);

445
446
447
448
449
450
451
	/* Show useful symmetry information */
	const char *holo = get_holohedral(sym);
	int np = num_general_equivs(holo) / num_general_equivs(sym);
	STATUS("Resolving from point group %s to %s (%i possibilities)\n",
	       holo, sym, np);


452
	n_patterns = 0;
Thomas White's avatar
Thomas White committed
453
	f0_valid = 0;
454
455
456
	do {

		char line[1024];
457
458
		signed int h, k, l;
		float intensity;
459
460
461
		int r;

		rval = fgets(line, 1023, fh);
462
463
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
Thomas White's avatar
Tidy-up    
Thomas White committed
464

465
			/* Start of first pattern? */
Thomas White's avatar
Thomas White committed
466
467
468
469
			if ( n_patterns == 0 ) {
				n_patterns++;
				continue;
			}
Thomas White's avatar
Tidy-up    
Thomas White committed
470

471
			/* Detwin before scaling */
472
473
			if ( config_detwin ) {
				detwin_intensities(model, new_pattern,
474
				                   model_counts, items);
475
476
			}

Thomas White's avatar
Thomas White committed
477
478
			/* Assume a default I0 if we don't have one by now */
			if ( config_scale && !f0_valid ) {
479
				n_nof0++;
480
481
482
				f0 = 1.0;
			}

483
484
485
			/* Scale if requested */
			if ( config_scale ) {
				scale_intensities(model, new_pattern,
486
				                  model_counts, items, f0,
Thomas White's avatar
Thomas White committed
487
				                  f0_valid);
488
489
			}

490
491
			/* Start of second or later pattern */
			merge_pattern(model, new_pattern, model_counts,
492
			              items, config_maxonly, config_sum, sym);
493

494
495
			if ( (trueref != NULL) && config_every
			    && (n_patterns % config_every == 0) ) {
496
497
498
				process_reflections(model, model_counts,
				                    trueref, truecounts,
				                    n_patterns, cell,
499
				                    config_rvsq);
500
			}
Thomas White's avatar
Thomas White committed
501
502
503

			if ( n_patterns == config_stopafter ) break;

Thomas White's avatar
Thomas White committed
504
			n_patterns++;
505
			clear_items(items);
Thomas White's avatar
Thomas White committed
506
507
508

			progress_bar(n_patterns, n_total_patterns, "Merging");

Thomas White's avatar
Thomas White committed
509
			f0_valid = 0;
510
511
512
513
514
515

		}

		if ( strncmp(line, "f0 = ", 5) == 0 ) {
			r = sscanf(line, "f0 = %f", &f0);
			if ( r != 1 ) {
Thomas White's avatar
Thomas White committed
516
517
				f0 = 1.0;
				f0_valid = 0;
518
519
				continue;
			}
Thomas White's avatar
Thomas White committed
520
			f0_valid = 1;
521
522
		}

523
		r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity);
524
525
		if ( r != 4 ) continue;

Thomas White's avatar
Tidy-up    
Thomas White committed
526
527
		if ( (h==0) && (k==0) && (l==0) ) continue;

528
		if ( find_item(items, h, k, l) != 0 ) {
529
530
			ERROR("More than one measurement for %i %i %i in"
			      " pattern number %i\n", h, k, l, n_patterns);
Thomas White's avatar
Thomas White committed
531
		}
532
		set_intensity(new_pattern, h, k, l, intensity);
533
		add_item(items, h, k, l);
534
535
536
537
538

	} while ( rval != NULL );

	fclose(fh);

539
	if ( trueref != NULL ) {
540
		process_reflections(model, model_counts, trueref, truecounts,
541
		                    n_patterns, cell, config_rvsq);
Thomas White's avatar
Thomas White committed
542
	}
543

544
	if ( output != NULL ) {
545
546
		write_reflections(output, model_counts, model, NULL,
		                  0, cell, 1);
547
	}
Thomas White's avatar
Thomas White committed
548
549

	STATUS("There were %u patterns.\n", n_patterns);
550
	STATUS("%i had no f0 valid value.\n", n_nof0);
551

552
	delete_items(items);
553
	free(sym);
Thomas White's avatar
Thomas White committed
554
555
556
557
558
	free(model);
	free(model_counts);
	free(new_pattern);
	free(output);
	free(cell);
559

560
561
	return 0;
}