process_hkl.c 10.7 KB
Newer Older
1
2
3
4
5
/*
 * process_hkl.c
 *
 * Assemble and process FEL Bragg intensities
 *
Thomas White's avatar
Thomas White committed
6
 * (c) 2006-2010 Thomas White <taw@physics.org>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "statistics.h"
#include "sfac.h"
27
#include "reflections.h"
28
#include "likelihood.h"
29
#include "symmetry.h"
30
31


Thomas White's avatar
Thomas White committed
32
33
34
35
/* Number of divisions for R vs |q| graphs */
#define RVQDV (20)


36
37
38
static void show_help(const char *s)
{
	printf("Syntax: %s [options]\n\n", s);
Thomas White's avatar
Thomas White committed
39
40
41
	printf(
"Assemble and process FEL Bragg intensities.\n"
"\n"
42
43
44
45
"  -h, --help                Display this help message.\n"
"  -i, --input=<filename>    Specify input filename (\"-\" for stdin).\n"
"  -o, --output=<filename>   Specify output filename for merged intensities\n"
"                             (don't specify for no output).\n"
46
"  -p, --pdb=<filename>      PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
47
"\n"
48
49
50
51
52
53
54
55
56
57
58
59
"      --max-only            Take the integrated intensity to be equal to the\n"
"                             maximum intensity measured for that reflection.\n"
"                             The default is to use the mean value from all\n"
"                             measurements.\n"
"      --sum                 Sum (rather than average) the intensities for the\n"
"                             final output list.  This is useful for comparing\n"
"                             results to radially summed powder patterns, but\n"
"                             will break R-factor analysis.\n"
"      --stop-after=<n>      Stop after processing n patterns.  Zero means\n"
"                             keep going until the end of the input, and is\n"
"                             the default.\n"
"\n"
60
61
"      --scale               Scale each pattern for best fit with the current\n"
"                             model.\n"
Thomas White's avatar
Thomas White committed
62
"  -y, --symmetry=<sym>      Merge according to point group <sym>.\n"
63
);
64
65
66
}


67
68
69
70
71
/* Note "holo" needn't actually be a holohedral point group, if you want to try
 * something strange like resolving from a low-symmetry group into an even
 * lower symmetry one.
 */
static ReflItemList *get_twin_possibilities(const char *holo, const char *mero)
72
{
73
	ReflItemList *test_items;
74
	ReflItemList *twins;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
	int np;

	np = num_general_equivs(holo) / num_general_equivs(mero);

	test_items = new_items();

	/* Some arbitrarily chosen reflections which can't be special
	 * reflections in any point group, i.e. lots of odd numbers,
	 * prime numbers and so on.  There's probably an analytical
	 * way of working these out, but this will do. */
	add_item(test_items, 1, 2, 3);
	add_item(test_items, 3, 7, 13);
	add_item(test_items, 5, 2, 1);

	twins = get_twins(test_items, holo, mero);
	delete_items(test_items);

	if ( num_items(twins) != np ) {
		ERROR("Whoops! Couldn't find all the twinning possiblities.\n");
		abort();
	}

	return twins;
}


static int resolve_twin(ReflItemList *items, ReflItemList *twins,
                        const double *patt, const double *model,
                        const unsigned int *model_counts,
                        const char *holo, const char *mero)
{
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
	int n, i;
	double best_fom = 0.0;
	int best_op = 0;

	n = num_items(twins);

	for ( i=0; i<n; i++ ) {

		int j;
		int op;
		double *trial_ints = new_list_intensity();
		unsigned int *trial_counts = new_list_count();
		double fom;

		op = get_item(twins, i)->op;

		for ( j=0; j<num_items(items); j++ ) {

			signed int h, k, l;
			struct refl_item *r = get_item(items, j);

			get_general_equiv(r->h, r->k, r->l, &h, &k, &l,
			                  holo, op);
129
			get_asymm(h, k, l, &h, &k, &l, mero);
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

			set_intensity(trial_ints, h, k, l,
			              lookup_intensity(patt, r->h, r->k, r->l));
			set_count(trial_counts, h, k, l, 1);

		}

		fom = stat_pearson(trial_ints, trial_counts,
		                   model, model_counts);

		free(trial_ints);
		free(trial_counts);

		//printf(" %f", fom);
		if ( fom > best_fom ) {
			best_fom = fom;
			best_op = op;
		}

	}
	//printf("\n");

	return best_op;
}


156
157
static void merge_pattern(double *model, const double *new,
                          unsigned int *model_counts,
158
                          ReflItemList *items, int mo, int sum,
159
160
                          ReflItemList *twins,
                          const char *holo, const char *mero)
161
{
162
	int i;
163
164
	int twin;

165
166
167
168
169
170
	if ( twins != NULL ) {
		twin = resolve_twin(items, twins, new, model,
		                     model_counts, holo, mero);
	} else {
		twin = 0;
	}
171

172
	for ( i=0; i<num_items(items); i++ ) {
173
174

		double intensity;
175
		signed int hs, ks, ls;
176
177
		signed int h, k, l;
		struct refl_item *item;
178

179
180
		item = get_item(items, i);

181
182
183
184
		hs = item->h;
		ks = item->k;
		ls = item->l;

185
		get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin);
186
		get_asymm(h, k, l, &h, &k, &l, mero);
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

		intensity = lookup_intensity(new, h, k, l);

		if ( !mo ) {
			integrate_intensity(model, h, k, l, intensity);
			if ( sum ) {
				set_count(model_counts, h, k, l, 1);
			} else {
				integrate_count(model_counts, h, k, l, 1);
			}
		} else {
			if ( intensity > lookup_intensity(model, h, k, l) ) {
				set_intensity(model, h, k, l, intensity);
			}
			set_count(model_counts, h, k, l, 1);
		}

	}
}


208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
static void merge_all(FILE *fh, double *model, unsigned int *model_counts,
                      int config_maxonly, int config_scale, int config_sum,
                      int config_stopafter,
                      ReflItemList *twins, const char *holo, const char *mero,
                      int n_total_patterns)
{
	char *rval;
	float f0;
	int n_nof0 = 0;
	int f0_valid = 0;
	int n_patterns = 0;
	double *new_pattern = new_list_intensity();
	ReflItemList *items = new_items();

	do {

		char line[1024];
		signed int h, k, l;
		float intensity;
		int r;

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {

			/* Start of first pattern? */
			if ( n_patterns == 0 ) {
				n_patterns++;
				continue;
			}

			/* Assume a default I0 if we don't have one by now */
			if ( config_scale && !f0_valid ) {
				n_nof0++;
				f0 = 1.0;
			}

			/* Scale if requested */
			if ( config_scale ) {
				scale_intensities(model, new_pattern,
				                  model_counts, items, f0,
				                  f0_valid);
			}

			/* Start of second or later pattern */
			merge_pattern(model, new_pattern, model_counts,
			              items, config_maxonly, config_sum, twins,
			              holo, mero);

			if ( n_patterns == config_stopafter ) break;

			n_patterns++;
			clear_items(items);

			progress_bar(n_patterns, n_total_patterns, "Merging");

			f0_valid = 0;

		}

		if ( strncmp(line, "f0 = ", 5) == 0 ) {
			r = sscanf(line, "f0 = %f", &f0);
			if ( r != 1 ) {
				f0 = 1.0;
				f0_valid = 0;
				continue;
			}
			f0_valid = 1;
		}

		r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity);
		if ( r != 4 ) continue;

		if ( (h==0) && (k==0) && (l==0) ) continue;

		if ( find_item(items, h, k, l) != 0 ) {
			ERROR("More than one measurement for %i %i %i in"
			      " pattern number %i\n", h, k, l, n_patterns);
		}
		set_intensity(new_pattern, h, k, l, intensity);
		add_item(items, h, k, l);

	} while ( rval != NULL );

	delete_items(items);
	free(new_pattern);

	STATUS("%i patterns had no f0 valid value.\n", n_nof0);
}


static int count_patterns(FILE *fh)
{
	char *rval;

	int n_total_patterns = 0;
	do {
		char line[1024];

		rval = fgets(line, 1023, fh);
		if ( (strncmp(line, "Reflections from indexing", 25) == 0)
		    || (strncmp(line, "New pattern", 11) == 0) ) {
		    n_total_patterns++;
		}
	} while ( rval != NULL );

	return n_total_patterns;
}


318
319
320
321
int main(int argc, char *argv[])
{
	int c;
	char *filename = NULL;
322
	char *output = NULL;
323
	FILE *fh;
324
	double *model;
325
	unsigned int *model_counts;
326
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
327
	int config_maxonly = 0;
Thomas White's avatar
Thomas White committed
328
	int config_stopafter = 0;
329
	int config_sum = 0;
330
	int config_scale = 0;
331
	unsigned int n_total_patterns;
332
	char *sym = NULL;
333
	char *pdb = NULL;
334
335
	ReflItemList *twins;
	int i;
336
337
338
339
340

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"input",              1, NULL,               'i'},
Thomas White's avatar
Thomas White committed
341
		{"output",             1, NULL,               'o'},
Thomas White's avatar
Thomas White committed
342
		{"max-only",           0, &config_maxonly,     1},
Thomas White's avatar
Thomas White committed
343
		{"output-every",       1, NULL,               'e'},
Thomas White's avatar
Thomas White committed
344
		{"stop-after",         1, NULL,               's'},
345
		{"sum",                0, &config_sum,         1},
346
		{"scale",              0, &config_scale,       1},
347
		{"symmetry",           1, NULL,               'y'},
348
		{"pdb",                1, NULL,               'p'},
349
350
351
352
		{0, 0, NULL, 0}
	};

	/* Short options */
353
354
	while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:",
	                        longopts, NULL)) != -1) {
355
356

		switch (c) {
Thomas White's avatar
Thomas White committed
357
		case 'h' :
358
359
360
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
361
		case 'i' :
362
363
364
			filename = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
365
		case 'o' :
366
367
368
			output = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
369
		case 's' :
Thomas White's avatar
Thomas White committed
370
371
372
			config_stopafter = atoi(optarg);
			break;

Thomas White's avatar
Thomas White committed
373
		case 'p' :
374
375
376
			pdb = strdup(optarg);
			break;

377
378
379
380
		case 'y' :
			sym = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
381
		case 0 :
382
383
			break;

Thomas White's avatar
Thomas White committed
384
		default :
385
386
387
388
389
390
391
392
393
394
			return 1;
		}

	}

	if ( filename == NULL ) {
		ERROR("Please specify filename using the -i option\n");
		return 1;
	}

395
396
397
398
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

399
400
401
402
	if ( sym == NULL ) {
		sym = strdup("1");
	}

403
404
	model = new_list_intensity();
	model_counts = new_list_count();
405

Thomas White's avatar
Thomas White committed
406
407
408
	cell = load_cell_from_pdb(pdb);
	free(pdb);

409
410
411
	/* Show useful symmetry information */
	const char *holo = get_holohedral(sym);
	int np = num_general_equivs(holo) / num_general_equivs(sym);
412
413
414
415
416
417
418
419
420
421
422
	if ( np > 1 ) {

		STATUS("Resolving point group %s into %s (%i possibilities)\n",
		       holo, sym, np);
		/* Get the list of twin/Bijvoet possibilities */
		twins = get_twin_possibilities(holo, sym);
		STATUS("Twin/inversion operation indices (from %s) are:", holo);
		for ( i=0; i<num_items(twins); i++ ) {
			STATUS(" %i", get_item(twins, i)->op);
		}
		STATUS("\n");
423

424
425
426
427
	} else {
		STATUS("No twin/inversion resolution necessary.\n");
		twins = NULL;
	}
428

429
430
431
432
433
434
435
436
437
438
439
	/* Open the data stream */
	if ( strcmp(filename, "-") == 0 ) {
		fh = stdin;
	} else {
		fh = fopen(filename, "r");
	}
	free(filename);
	if ( fh == NULL ) {
		ERROR("Failed to open input file\n");
		return 1;
	}
Thomas White's avatar
Tidy-up    
Thomas White committed
440

441
442
443
444
	/* Count the number of patterns in the file */
	n_total_patterns = count_patterns(fh);
	STATUS("There are %i patterns to process\n", n_total_patterns);
	rewind(fh);
445

446
447
448
449
	merge_all(fh, model, model_counts,
	          config_maxonly, config_scale, config_sum, config_stopafter,
                  twins, holo, sym, n_total_patterns);
	rewind(fh);
450
451
452

	fclose(fh);

453
	if ( output != NULL ) {
454
455
		write_reflections(output, model_counts, model, NULL,
		                  0, cell, 1);
456
	}
Thomas White's avatar
Thomas White committed
457

458
	free(sym);
Thomas White's avatar
Thomas White committed
459
460
461
462
	free(model);
	free(model_counts);
	free(output);
	free(cell);
463

464
465
	return 0;
}