compare_hkl.c 11.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * compare_hkl.c
 *
 * Compare reflection lists
 *
 * (c) 2006-2010 Thomas White <taw@physics.org>
 *
 * Part of CrystFEL - crystallography with a FEL
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>

#include "utils.h"
#include "sfac.h"
#include "reflections.h"
Thomas White's avatar
Thomas White committed
27
#include "statistics.h"
28
#include "symmetry.h"
29
30


Thomas White's avatar
Thomas White committed
31
/* Number of bins for plot of resolution shells */
32
#define NBINS (10)
Thomas White's avatar
Thomas White committed
33
34


35
36
static void show_help(const char *s)
{
37
	printf("Syntax: %s [options] <file1.hkl> <file2.hkl>\n\n", s);
38
39
40
41
42
	printf(
"Compare intensity lists.\n"
"\n"
"  -h, --help                 Display this help message.\n"
"  -o, --output=<filename>    Specify output filename for correction factor.\n"
43
"  -y, --symmetry=<sym>       The symmetry of both the input files.\n"
44
"  -p, --pdb=<filename>       PDB file to use (default: molecule.pdb).\n"
Thomas White's avatar
Thomas White committed
45
"      --shells               Plot the figures of merit by resolution.\n"
46
47
"      --rmin=<res>           Fix lower resolution limit for --shells (m^-1).\n"
"      --rmax=<res>           Fix upper resolution limit for --shells (m^-1).\n"
48
49
50
51
"\n");
}


Thomas White's avatar
Thomas White committed
52
static void plot_shells(const double *ref1, const double *ref2,
53
54
                        ReflItemList *items, double scale, UnitCell *cell,
                        const char *sym, ReflItemList *characterise,
55
56
                        unsigned int *char_counts, const double *sigma,
                        double rmin_fix, double rmax_fix)
Thomas White's avatar
Thomas White committed
57
58
{
	double num[NBINS];
Thomas White's avatar
Thomas White committed
59
	int cts[NBINS];
60
61
62
63
64
65
66
	int possible[NBINS];
	unsigned int *counted;
	unsigned int measurements[NBINS];
	unsigned int measured[NBINS];
	double total_vol, vol_per_shell;
	double rmins[NBINS];
	double rmaxs[NBINS];
67
	double snr[NBINS];
Thomas White's avatar
Thomas White committed
68
	double den;
69
70
	double rmin, rmax;
	signed int h, k, l;
Thomas White's avatar
Thomas White committed
71
	int i;
Thomas White's avatar
Thomas White committed
72
	int ctot;
Thomas White's avatar
Thomas White committed
73
	FILE *fh;
74
75
	double snr_total = 0;
	int nmeas = 0;
76
	int nmeastot = 0;
77
	int nout = 0;
Thomas White's avatar
Thomas White committed
78
79

	if ( cell == NULL ) {
Thomas White's avatar
Thomas White committed
80
		ERROR("Need the unit cell to plot resolution shells.\n");
Thomas White's avatar
Thomas White committed
81
82
83
		return;
	}

Thomas White's avatar
Thomas White committed
84
	fh = fopen("shells.dat", "w");
Thomas White's avatar
Thomas White committed
85
	if ( fh == NULL ) {
Thomas White's avatar
Thomas White committed
86
		ERROR("Couldn't open 'shells.dat'\n");
Thomas White's avatar
Thomas White committed
87
88
89
90
91
		return;
	}

	for ( i=0; i<NBINS; i++ ) {
		num[i] = 0.0;
Thomas White's avatar
Thomas White committed
92
		cts[i] = 0;
93
94
95
		possible[i] = 0;
		measured[i] = 0;
		measurements[i] = 0;
96
		snr[i] = 0;
Thomas White's avatar
Thomas White committed
97
98
99
100
101
102
103
104
	}

	rmin = +INFINITY;
	rmax = 0.0;
	for ( i=0; i<num_items(items); i++ ) {

		struct refl_item *it;
		signed int h, k, l;
105
		double d;
Thomas White's avatar
Thomas White committed
106
107
108
109

		it = get_item(items, i);
		h = it->h;  k = it->k;  l = it->l;

110
		d = resolution(cell, h, k, l) * 2.0;
111
112
		if ( d > rmax ) rmax = d;
		if ( d < rmin ) rmin = d;
Thomas White's avatar
Thomas White committed
113
114

	}
115

116
	STATUS("1/d goes from %f to %f nm^-1\n", rmin/1e9, rmax/1e9);
117

118
119
	/* Widen the range just a little bit */
	rmin -= 0.001e9;
120
121
	rmax += 0.001e9;

122
123
124
	/* Fixed resolution shells if needed */
	if ( rmin_fix > 0.0 ) rmin = rmin_fix;
	if ( rmax_fix > 0.0 ) rmax = rmax_fix;
125

126
127
128
129
130
131
132
133
134
135
	total_vol = pow(rmax, 3.0) - pow(rmin, 3.0);
	vol_per_shell = total_vol / NBINS;
	rmins[0] = rmin;
	for ( i=1; i<NBINS; i++ ) {

		double r;

		r = vol_per_shell + pow(rmins[i-1], 3.0);
		r = pow(r, 1.0/3.0);

136
		/* Shells of constant volume */
137
138
139
		rmaxs[i-1] = r;
		rmins[i] = r;

140
141
142
143
		/* Shells of constant thickness */
		//rmins[i] = rmins[i-1] + (rmax-rmin)/NBINS;
		//rmaxs[i-1] = rmins[i-1] + (rmax-rmin)/NBINS;

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
		STATUS("Shell %i: %f to %f\n", i-1,
		       rmins[i-1]/1e9, rmaxs[i-1]/1e9);

	}
	rmaxs[NBINS-1] = rmax;
	STATUS("Shell %i: %f to %f\n", NBINS-1,
	       rmins[NBINS-1]/1e9, rmaxs[NBINS-1]/1e9);

	/* Count the number of reflections possible in each shell */
	counted = new_list_count();
	for ( h=-50; h<=+50; h++ ) {
	for ( k=-50; k<=+50; k++ ) {
	for ( l=-50; l<=+50; l++ ) {

		double d;
		signed int hs, ks, ls;
		int bin;

		get_asymm(h, k, l, &hs, &ks, &ls, sym);
		if ( lookup_count(counted, hs, ks, ls) ) continue;
		set_count(counted, hs, ks, ls, 1);

		d = resolution(cell, h, k, l) * 2.0;

		bin = -1;
		for ( i=0; i<NBINS; i++ ) {
			if ( (d>rmins[i]) && (d<=rmaxs[i]) ) {
				bin = i;
				break;
			}
		}
		if ( bin == -1 ) continue;

		possible[bin]++;

	}
	}
	}
	free(counted);

	/* Characterise the first data set (only) */
	for ( i=0; i<num_items(characterise); i++ ) {

		struct refl_item *it;
		signed int h, k, l;
		double d;
		int bin;
		int j;

		it = get_item(characterise, i);
		h = it->h;  k = it->k;  l = it->l;

		d = resolution(cell, h, k, l) * 2.0;

		bin = -1;
		for ( j=0; j<NBINS; j++ ) {
			if ( (d>rmins[j]) && (d<=rmaxs[j]) ) {
				bin = j;
				break;
			}
		}
		if ( bin == -1 ) {
206
			nout++;
207
208
209
210
211
			continue;
		}

		measured[bin]++;
		measurements[bin] += lookup_count(char_counts, h, k, l);
212
213
214
215
216
		snr[bin] += (lookup_intensity(ref1, h, k, l) /
		                              lookup_intensity(sigma, h, k, l));
		snr_total += (lookup_intensity(ref1, h, k, l) /
		                              lookup_intensity(sigma, h, k, l));
		nmeas++;
217
		nmeastot += lookup_count(char_counts, h, k, l);
218
219

	}
220
	STATUS("overall <snr> = %f\n", snr_total/(double)nmeas);
221
222
	STATUS("%i measurements in total.\n", nmeastot);
	STATUS("%i reflections in total.\n", nmeas);
Thomas White's avatar
Thomas White committed
223

224
225
226
227
228
	if ( nout ) {
		STATUS("Warning; %i reflections outside resolution range.\n",
		       nout);
	}

Thomas White's avatar
Thomas White committed
229
	den = 0.0;
Thomas White's avatar
Thomas White committed
230
	ctot = 0;
Thomas White's avatar
Thomas White committed
231
232
233
234
	for ( i=0; i<num_items(items); i++ ) {

		struct refl_item *it;
		signed int h, k, l;
235
		double d;
Thomas White's avatar
Thomas White committed
236
		int bin;
237
		double i1, i2;
238
		int j;
Thomas White's avatar
Thomas White committed
239
240
241
242

		it = get_item(items, i);
		h = it->h;  k = it->k;  l = it->l;

243
		d = resolution(cell, h, k, l) * 2.0;
Thomas White's avatar
Thomas White committed
244

245
246
247
248
249
250
251
		bin = -1;
		for ( j=0; j<NBINS; j++ ) {
			if ( (d>rmins[j]) && (d<=rmaxs[j]) ) {
				bin = j;
				break;
			}
		}
252
253
254

		/* Outside resolution range? */
		if ( bin == -1 ) continue;
Thomas White's avatar
Thomas White committed
255
256

		i1 = lookup_intensity(ref1, h, k, l);
257
		i2 = lookup_intensity(ref2, h, k, l);
Thomas White's avatar
Thomas White committed
258
		i2 *= scale;
Thomas White's avatar
Thomas White committed
259

Thomas White's avatar
Thomas White committed
260
		num[bin] += fabs(i1 - i2);
261
		den += i1;
Thomas White's avatar
Thomas White committed
262
		ctot++;
263
		cts[bin]++;
Thomas White's avatar
Thomas White committed
264
265
266
267
268

	}

	for ( i=0; i<NBINS; i++ ) {

269
		double r, cen;
270
		cen = rmins[i] + (rmaxs[i] - rmins[i])/2.0;
271
		r = (num[i]/den)*((double)ctot/cts[i]);
272
		fprintf(fh, "%f %f %i %i %i %f %f\n", cen*1.0e-9, r*100.0,
273
274
		                            measured[i],
		                            possible[i], measurements[i],
275
276
		                            (float)measurements[i]/measured[i],
		                            (snr[i]/(double)measured[i]));
Thomas White's avatar
Thomas White committed
277
278
279
280
281
282
283

	}

	fclose(fh);
}


284
285
286
287
288
int main(int argc, char *argv[])
{
	int c;
	double *ref1;
	double *ref2;
289
	double *ref2_transformed;
290
	double *out;
291
	UnitCell *cell;
292
293
294
	char *outfile = NULL;
	char *afile = NULL;
	char *bfile = NULL;
Thomas White's avatar
Thomas White committed
295
	char *sym = NULL;
296
	double scale, scale_r2, scale_rdig, R1, R2, R1i, Rdiff, pearson;
297
	double scale_rintint, scale_r1i, scale_r1, scale_r1fi;
Thomas White's avatar
Thomas White committed
298
299
	int i, ncom;
	ReflItemList *i1, *i2, *icommon;
Thomas White's avatar
Thomas White committed
300
	int config_shells = 0;
301
	char *pdb = NULL;
302
303
304
305
	double *esd1;
	double *esd2;
	int rej1 = 0;
	int rej2 = 0;
306
	unsigned int *cts1;
307
308
	float rmin_fix = -1.0;
	float rmax_fix = -1.0;
309
310
311
312
313

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
		{"output",             1, NULL,               'o'},
314
		{"symmetry",           1, NULL,               'y'},
Thomas White's avatar
Thomas White committed
315
		{"shells",             0, &config_shells,     1},
316
		{"pdb",                1, NULL,               'p'},
317
318
		{"rmin",               1, NULL,               2},
		{"rmax",               1, NULL,               3},
319
320
321
322
		{0, 0, NULL, 0}
	};

	/* Short options */
323
	while ((c = getopt_long(argc, argv, "ho:y:p:", longopts, NULL)) != -1) {
324
325

		switch (c) {
Thomas White's avatar
Thomas White committed
326
		case 'h' :
327
328
329
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
330
		case 'o' :
331
332
333
			outfile = strdup(optarg);
			break;

334
335
336
337
		case 'y' :
			sym = strdup(optarg);
			break;

338
339
340
341
		case 'p' :
			pdb = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
342
		case 0 :
343
344
			break;

345
346
347
348
349
350
351
352
353
354
355
356
357
358
		case 2 :
			if ( sscanf(optarg, "%e", &rmin_fix) != 1 ) {
				ERROR("Invalid value for --rmin\n");
				return 1;
			}
			break;

		case 3 :
			if ( sscanf(optarg, "%e", &rmax_fix) != 1 ) {
				ERROR("Invalid value for --rmax\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
359
		default :
360
361
362
363
364
			return 1;
		}

	}

365
366
367
368
369
	if ( argc != (optind+2) ) {
		ERROR("Please provide exactly two HKL files to compare.\n");
		return 1;
	}

370
371
372
373
	if ( sym == NULL ) {
		sym = strdup("1");
	}

374
375
376
	afile = strdup(argv[optind++]);
	bfile = strdup(argv[optind]);

377
378
379
380
381
382
383
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

	cell = load_cell_from_pdb(pdb);
	free(pdb);

Thomas White's avatar
Thomas White committed
384
	ref1 = new_list_intensity();
385
	esd1 = new_list_sigma();
386
387
	cts1 = new_list_count();
	i1 = read_reflections(afile, ref1, NULL, cts1, esd1);
Thomas White's avatar
Thomas White committed
388
	if ( i1 == NULL ) {
389
390
391
		ERROR("Couldn't open file '%s'\n", afile);
		return 1;
	}
Thomas White's avatar
Thomas White committed
392
	ref2 = new_list_intensity();
393
394
	esd2 = new_list_sigma();
	i2 = read_reflections(bfile, ref2, NULL, NULL, esd2);
Thomas White's avatar
Thomas White committed
395
	if ( i2 == NULL ) {
396
397
398
399
		ERROR("Couldn't open file '%s'\n", bfile);
		return 1;
	}

Thomas White's avatar
Thomas White committed
400
401
	/* List for output scale factor map */
	out = new_list_intensity();
402

403
404
405
406
407
	/* Find common reflections (taking symmetry into account) */
	icommon = new_items();
	ref2_transformed = new_list_intensity();
	for ( i=0; i<num_items(i1); i++ ) {

Thomas White's avatar
Thomas White committed
408
409
		struct refl_item *it;
		signed int h, k, l;
410
411
		signed int he, ke, le;
		double val1, val2;
412
413
		double sig1, sig2;
		int ig = 0;
414
		double d;
415

416
		it = get_item(i1, i);
Thomas White's avatar
Thomas White committed
417
		h = it->h;  k = it->k;  l = it->l;
418

419
		if ( !find_unique_equiv(i2, h, k, l, sym, &he, &ke, &le) ) {
Thomas White's avatar
Thomas White committed
420
421
			//STATUS("%i %i %i not matched (%f nm).\n", h, k, l,
			//       1.0/(2.0*resolution(cell, h, k, l)/1e9));
422
423
			continue;
		}
424

425
426
		val1 = lookup_intensity(ref1, h, k, l);
		val2 = lookup_intensity(ref2, he, ke, le);
427
		sig1 = lookup_sigma(esd1, h, k, l);
428
		sig2 = lookup_sigma(esd2, he, ke, le);
429
430
431
432
433
434
435
436
437

		if ( val1 < 3.0 * sig1 ) {
			rej1++;
			ig = 1;
		}
		if ( val2 < 3.0 * sig2 ) {
			rej2++;
			ig = 1;
		}
438
439
440
441
442

		d = 0.5/resolution(cell, h, k, l);
		if ( d > 55.0e-10 ) ig = 1;
		//if ( d < 15.0e-10 ) ig = 1;

443
		//if ( ig ) continue;
444

445
446
447
		set_intensity(ref2_transformed, h, k, l, val2);
		set_intensity(out, h, k, l, val1/val2);
		add_item(icommon, h, k, l);
448
449

	}
450
	ncom = num_items(icommon);
451
452
453
454
	STATUS("%i reflections with I < 3.0*sigma(I) rejected from '%s'\n",
	       rej1, afile);
	STATUS("%i reflections with I < 3.0*sigma(I) rejected from '%s'\n",
	       rej2, bfile);
455

Thomas White's avatar
Thomas White committed
456
457
	STATUS("%i,%i reflections: %i in common\n",
	       num_items(i1), num_items(i2), ncom);
Thomas White's avatar
Thomas White committed
458

459
	R1 = stat_r1_ignore(ref1, ref2_transformed, icommon, &scale_r1fi);
460
	STATUS("R1(F) = %5.4f %% (scale=%5.2e) (ignoring negative intensities)\n",
461
	       R1*100.0, scale_r1fi);
Thomas White's avatar
Thomas White committed
462

463
	R1 = stat_r1_zero(ref1, ref2_transformed, icommon, &scale_r1);
464
	STATUS("R1(F) = %5.4f %% (scale=%5.2e) (zeroing negative intensities)\n",
465
	       R1*100.0, scale_r1);
Thomas White's avatar
Thomas White committed
466

Thomas White's avatar
Thomas White committed
467
	R2 = stat_r2(ref1, ref2_transformed, icommon, &scale_r2);
468
	STATUS("R2(I) = %5.4f %% (scale=%5.2e)\n", R2*100.0, scale_r2);
Thomas White's avatar
Thomas White committed
469

470
	R1i = stat_r1_i(ref1, ref2_transformed, icommon, &scale_r1i);
471
	STATUS("R1(I) = %5.4f %% (scale=%5.2e)\n", R1i*100.0, scale_r1i);
Thomas White's avatar
Thomas White committed
472

473
	Rdiff = stat_rdiff_ignore(ref1, ref2_transformed, icommon, &scale_rdig);
Thomas White's avatar
Thomas White committed
474
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e) (ignoring negative intensities)\n",
475
	       Rdiff*100.0, scale_rdig);
Thomas White's avatar
Thomas White committed
476
477

	Rdiff = stat_rdiff_zero(ref1, ref2_transformed, icommon, &scale);
Thomas White's avatar
Thomas White committed
478
479
480
481
482
483
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e) (zeroing negative intensities)\n",
	       Rdiff*100.0, scale);

	Rdiff = stat_rdiff_intensity(ref1, ref2_transformed, icommon,
	                             &scale_rintint);
	STATUS("Rint(I) = %5.4f %% (scale=%5.2e)\n",
484
	       Rdiff*100.0, scale_rintint);
Thomas White's avatar
Thomas White committed
485
486
487
488
489
490
491
492
493
494
495

	pearson = stat_pearson_i(ref1, ref2_transformed, icommon);
	STATUS("Pearson r(I) = %5.4f\n", pearson);

	pearson = stat_pearson_f_ignore(ref1, ref2_transformed, icommon);
	STATUS("Pearson r(F) = %5.4f (ignoring negative intensities)\n",
	       pearson);

	pearson = stat_pearson_f_zero(ref1, ref2_transformed, icommon);
	STATUS("Pearson r(F) = %5.4f (zeroing negative intensities)\n",
	       pearson);
Thomas White's avatar
Thomas White committed
496

Thomas White's avatar
Thomas White committed
497
	if ( config_shells ) {
498
		plot_shells(ref1, ref2_transformed, icommon, scale_r1fi,
499
		            cell, sym, i1, cts1, esd1, rmin_fix, rmax_fix);
Thomas White's avatar
Thomas White committed
500
501
	}

502
	if ( outfile != NULL ) {
503
504
505
506

		write_reflections(outfile, icommon, out, NULL, NULL, cell, 1.0);
		STATUS("Sigma(I) values in output file are not meaningful.\n");

507
	}
508
509
510

	return 0;
}