compare_hkl.c 12.6 KB
Newer Older
1
2
3
4
5
/*
 * compare_hkl.c
 *
 * Compare reflection lists
 *
6
7
8
9
 * Copyright © 2012 Deutsches Elektronen-Synchrotron DESY,
 *                  a research centre of the Helmholtz Association.
 *
 * Authors:
10
 *   2010-2012 Thomas White <taw@physics.org>
11
 *
Thomas White's avatar
Thomas White committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 * This file is part of CrystFEL.
 *
 * CrystFEL is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CrystFEL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CrystFEL.  If not, see <http://www.gnu.org/licenses/>.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
Thomas White's avatar
Thomas White committed
40
#include <assert.h>
41
#include <gsl/gsl_errno.h>
42
43

#include "utils.h"
Thomas White's avatar
Thomas White committed
44
#include "statistics.h"
45
#include "symmetry.h"
Thomas White's avatar
Thomas White committed
46
#include "reflist-utils.h"
47
48


Thomas White's avatar
Thomas White committed
49
/* Number of bins for plot of resolution shells */
50
#define NBINS (10)
Thomas White's avatar
Thomas White committed
51
52


53
54
55
56
57
58
59
60
61
62
63
enum r_shell
{
	R_SHELL_NONE,
	R_SHELL_R1I,
	R_SHELL_R1F,
	R_SHELL_RSPLIT,
};


static enum r_shell get_r_shell(const char *s)
{
64
65
66
	if ( strcasecmp(s, "r1i") == 0 ) return R_SHELL_R1I;
	if ( strcasecmp(s, "r1f") == 0 ) return R_SHELL_R1F;
	if ( strcasecmp(s, "rsplit") == 0 ) return R_SHELL_RSPLIT;
67
68
69
70
71
72
73

	ERROR("Unknown R-factor '%s' - try '--shells=rsplit', or --help for"
	      " more possibilities.\n", s);
	exit(1);
}


74
75
static void show_help(const char *s)
{
76
	printf("Syntax: %s [options] <file1.hkl> <file2.hkl>\n\n", s);
77
78
79
80
	printf(
"Compare intensity lists.\n"
"\n"
"  -h, --help                 Display this help message.\n"
Thomas White's avatar
Thomas White committed
81
"  -o, --ratio=<filename>     Specify output filename for ratios.\n"
82
"  -y, --symmetry=<sym>       The symmetry of both the input files.\n"
Thomas White's avatar
Thomas White committed
83
"  -p, --pdb=<filename>       PDB file to use.\n"
84
85
"      --shells=<FoM>         Plot this figure of merit in resolution shells.\n"
"                              Choose from: 'Rsplit', 'R1f' and 'R1i'.\n"
86
87
"      --rmin=<res>           Fix lower resolution limit for --shells (m^-1).\n"
"      --rmax=<res>           Fix upper resolution limit for --shells (m^-1).\n"
88
"      --sigma-cutoff=<n>     Discard reflections with I/sigma(I) < n.\n"
89
90
91
92
"\n");
}


93
static void plot_shells(RefList *list1, RefList *list2, double scale,
94
95
                        UnitCell *cell, double rmin_fix, double rmax_fix,
                        enum r_shell config_shells)
Thomas White's avatar
Thomas White committed
96
97
{
	double num[NBINS];
Thomas White's avatar
Thomas White committed
98
	int cts[NBINS];
99
100
101
102
103
	unsigned int measurements[NBINS];
	unsigned int measured[NBINS];
	double total_vol, vol_per_shell;
	double rmins[NBINS];
	double rmaxs[NBINS];
104
	double snr[NBINS];
105
	double rmin, rmax;
Thomas White's avatar
Thomas White committed
106
	int i;
Thomas White's avatar
Thomas White committed
107
108
	Reflection *refl1;
	RefListIterator *iter;
109
110
111
	FILE *fh;
	double den;
	int ctot, nout;
Thomas White's avatar
Thomas White committed
112
113

	if ( cell == NULL ) {
Thomas White's avatar
Thomas White committed
114
		ERROR("Need the unit cell to plot resolution shells.\n");
Thomas White's avatar
Thomas White committed
115
116
117
118
119
		return;
	}

	for ( i=0; i<NBINS; i++ ) {
		num[i] = 0.0;
Thomas White's avatar
Thomas White committed
120
		cts[i] = 0;
121
122
		measured[i] = 0;
		measurements[i] = 0;
123
		snr[i] = 0;
Thomas White's avatar
Thomas White committed
124
125
	}

126
127
	/* Find resolution limits */
	resolution_limits(list1, cell, &rmin, &rmax);
128
	STATUS("1/d goes from %f to %f nm^-1\n", rmin/1e9, rmax/1e9);
129

130
131
	/* Widen the range just a little bit */
	rmin -= 0.001e9;
132
133
	rmax += 0.001e9;

134
135
136
	/* Fixed resolution shells if needed */
	if ( rmin_fix > 0.0 ) rmin = rmin_fix;
	if ( rmax_fix > 0.0 ) rmax = rmax_fix;
137

Thomas White's avatar
Thomas White committed
138
	/* Calculate the resolution bins */
139
140
141
142
143
144
145
146
147
148
	total_vol = pow(rmax, 3.0) - pow(rmin, 3.0);
	vol_per_shell = total_vol / NBINS;
	rmins[0] = rmin;
	for ( i=1; i<NBINS; i++ ) {

		double r;

		r = vol_per_shell + pow(rmins[i-1], 3.0);
		r = pow(r, 1.0/3.0);

149
		/* Shells of constant volume */
150
151
152
		rmaxs[i-1] = r;
		rmins[i] = r;

153
154
155
156
		/* Shells of constant thickness */
		//rmins[i] = rmins[i-1] + (rmax-rmin)/NBINS;
		//rmaxs[i-1] = rmins[i-1] + (rmax-rmin)/NBINS;

157
158
159
160
161
162
163
164
		STATUS("Shell %i: %f to %f\n", i-1,
		       rmins[i-1]/1e9, rmaxs[i-1]/1e9);

	}
	rmaxs[NBINS-1] = rmax;
	STATUS("Shell %i: %f to %f\n", NBINS-1,
	       rmins[NBINS-1]/1e9, rmaxs[NBINS-1]/1e9);

165
	den = 0.0;  ctot = 0;  nout = 0;
Thomas White's avatar
Thomas White committed
166
167
	for ( refl1 = first_refl(list1, &iter);
	      refl1 != NULL;
168
169
	      refl1 = next_refl(refl1, iter) )
	{
Thomas White's avatar
Thomas White committed
170
		signed int h, k, l;
171
		double d;
Thomas White's avatar
Thomas White committed
172
		int bin;
173
		double i1, i2, f1, f2;
174
		int j;
175
		Reflection *refl2;
Thomas White's avatar
Thomas White committed
176

Thomas White's avatar
Thomas White committed
177
		get_indices(refl1, &h, &k, &l);
178
		d = 2.0 * resolution(cell, h, k, l);
Thomas White's avatar
Thomas White committed
179

180
181
182
183
184
185
186
		bin = -1;
		for ( j=0; j<NBINS; j++ ) {
			if ( (d>rmins[j]) && (d<=rmaxs[j]) ) {
				bin = j;
				break;
			}
		}
187
188

		/* Outside resolution range? */
189
190
191
192
		if ( bin == -1 ) {
			nout++;
			continue;
		}
Thomas White's avatar
Thomas White committed
193

194
195
196
		refl2 = find_refl(list2, h, k, l);
		if ( refl2 == NULL ) continue;

Thomas White's avatar
Thomas White committed
197
		i1 = get_intensity(refl1);
198
199
200
201
202
203
204
205
		i2 = get_intensity(refl2);
		f1 = i1 > 0.0 ? sqrt(i1) : 0.0;
		f2 = i2 > 0.0 ? sqrt(i2) : 0.0;

		switch ( config_shells ) {

			case R_SHELL_RSPLIT :
			num[bin] += fabs(i1 - scale*i2);
206
			den += i1 + scale*i2;
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
			break;

			case R_SHELL_R1I :
			num[bin] += fabs(i1 - scale*i2);
			den += i1;
			break;

			case R_SHELL_R1F :
			num[bin] += fabs(f1 - scale*f2);
			den += f1;
			break;

			default : break;

		}
Thomas White's avatar
Thomas White committed
222

Thomas White's avatar
Thomas White committed
223
		ctot++;
224
		cts[bin]++;
Thomas White's avatar
Thomas White committed
225
226
	}

227
228
229
230
231
	if ( nout ) {
		STATUS("Warning; %i reflections outside resolution range.\n",
		       nout);
	}

232
233
234
235
236
237
	fh = fopen("shells.dat", "w");
	if ( fh == NULL ) {
		ERROR("Couldn't open 'shells.dat'\n");
		return;
	}

238
239
240
	switch ( config_shells ) {

		case R_SHELL_RSPLIT :
241
		fprintf(fh, "1/d centre   Rsplit / %%     nref       d (A)\n");
242
243
244
		break;

		case R_SHELL_R1I :
245
		fprintf(fh, "1/d centre   R1(I) / %%      nref       d (A)\n");
246
247
248
		break;

		case R_SHELL_R1F :
249
		fprintf(fh, "1/d centre   R1(F) ign -/%%  nref       d (A)\n");
250
251
252
		break;

		default :
253
		fprintf(fh, "1/d centre   0.0             nref       d (A)\n");
254
255
256
		break;

	}
257

Thomas White's avatar
Thomas White committed
258
259
	for ( i=0; i<NBINS; i++ ) {

260
		double r, cen;
261
		cen = rmins[i] + (rmaxs[i] - rmins[i])/2.0;
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

		switch ( config_shells ) {

			case R_SHELL_RSPLIT :
			r = (2.0*(num[i]/den)*((double)ctot/cts[i]))/sqrt(2.0);
			break;

			case R_SHELL_R1I :
			case R_SHELL_R1F :
			r = (num[i]/den) * ((double)ctot/cts[i]);
			break;

			default :
			r = 0.0;
			break;

		}

280
281
		fprintf(fh, "%10.3f %10.2f %10i %10.2f\n",
		        cen*1.0e-9, r*100.0, cts[i], (1.0/cen)*1e10);
Thomas White's avatar
Thomas White committed
282
283
284
285

	}

	fclose(fh);
Thomas White's avatar
Thomas White committed
286
287

	STATUS("Resolution shell information written to shells.dat.\n");
Thomas White's avatar
Thomas White committed
288
289
290
}


291
292
293
int main(int argc, char *argv[])
{
	int c;
294
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
295
	char *ratiofile = NULL;
296
297
	char *afile = NULL;
	char *bfile = NULL;
Thomas White's avatar
Thomas White committed
298
299
	char *sym_str = NULL;
	SymOpList *sym;
300
	double scale, scale_r2, scale_rdig, R1, R2, R1i, Rdiff, pearson;
301
	double scale_rintint, scale_r1i, scale_r1, scale_r1fi;
302
	int ncom, nrej;
Thomas White's avatar
Thomas White committed
303
304
	RefList *list1;
	RefList *list2;
305
306
	RefList *list1_raw;
	RefList *list2_raw;
Thomas White's avatar
Thomas White committed
307
	RefList *ratio;
308
	enum r_shell config_shells = R_SHELL_NONE;
309
	char *pdb = NULL;
310
311
	float rmin_fix = -1.0;
	float rmax_fix = -1.0;
Thomas White's avatar
Thomas White committed
312
313
	Reflection *refl1;
	RefListIterator *iter;
314
	int config_unity = 0;
315
	double scale_for_shells;
316
	float sigma_cutoff = -INFINITY;
317
318
319
320

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
Thomas White's avatar
Thomas White committed
321
		{"ratio" ,             1, NULL,               'o'},
322
		{"symmetry",           1, NULL,               'y'},
323
		{"pdb",                1, NULL,               'p'},
324
325
326
327
		{"rmin",               1, NULL,                2},
		{"rmax",               1, NULL,                3},
		{"shells",             1, NULL,                4},
		{"sigma-cutoff",       1, NULL,                5},
328
329
330
331
		{0, 0, NULL, 0}
	};

	/* Short options */
332
333
334
	while ((c = getopt_long(argc, argv, "ho:y:p:u",
	                        longopts, NULL)) != -1)
	{
335
336

		switch (c) {
Thomas White's avatar
Thomas White committed
337
338

			case 'h' :
339
340
341
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
342
			case 'o' :
Thomas White's avatar
Thomas White committed
343
			ratiofile = strdup(optarg);
344
345
			break;

Thomas White's avatar
Thomas White committed
346
			case 'y' :
Thomas White's avatar
Thomas White committed
347
			sym_str = strdup(optarg);
348
349
			break;

Thomas White's avatar
Thomas White committed
350
			case 'p' :
351
352
353
			pdb = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
354
			case 'u' :
355
356
357
			config_unity = 1;
			break;

Thomas White's avatar
Thomas White committed
358
			case 0 :
359
360
			break;

Thomas White's avatar
Thomas White committed
361
			case 2 :
362
363
364
365
366
367
			if ( sscanf(optarg, "%e", &rmin_fix) != 1 ) {
				ERROR("Invalid value for --rmin\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
368
			case 3 :
369
370
371
372
373
374
			if ( sscanf(optarg, "%e", &rmax_fix) != 1 ) {
				ERROR("Invalid value for --rmax\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
375
			case 4 :
376
377
378
			config_shells = get_r_shell(optarg);
			break;

379
380
381
382
383
384
385
386
			case 5 :
			if ( sscanf(optarg, "%f", &sigma_cutoff) != 1 ) {
				ERROR("Invalid value for --sigma-cutoff\n");
				return 1;
			}
			break;


Thomas White's avatar
Thomas White committed
387
			default :
388
			return 1;
Thomas White's avatar
Thomas White committed
389

390
391
392
393
		}

	}

394
395
396
397
398
	if ( argc != (optind+2) ) {
		ERROR("Please provide exactly two HKL files to compare.\n");
		return 1;
	}

Thomas White's avatar
Thomas White committed
399
400
	if ( sym_str == NULL ) {
		sym_str = strdup("1");
401
	}
Thomas White's avatar
Thomas White committed
402
403
	sym = get_pointgroup(sym_str);
	free(sym_str);
404

405
406
407
	afile = strdup(argv[optind++]);
	bfile = strdup(argv[optind]);

408
409
410
411
412
413
414
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

	cell = load_cell_from_pdb(pdb);
	free(pdb);

415
416
	list1_raw = read_reflections(afile);
	if ( list1_raw == NULL ) {
Thomas White's avatar
Thomas White committed
417
		ERROR("Couldn't read file '%s'\n", afile);
418
419
		return 1;
	}
Thomas White's avatar
Thomas White committed
420

421
422
	list2_raw = read_reflections(bfile);
	if ( list2_raw == NULL ) {
Thomas White's avatar
Thomas White committed
423
		ERROR("Couldn't read file '%s'\n", bfile);
424
425
426
		return 1;
	}

Thomas White's avatar
Thomas White committed
427
	/* Check that the intensities have the correct symmetry */
428
	if ( check_list_symmetry(list1_raw, sym) ) {
Thomas White's avatar
Thomas White committed
429
		ERROR("The first input reflection list does not appear to"
Thomas White's avatar
Thomas White committed
430
		      " have symmetry %s\n", symmetry_name(sym));
Thomas White's avatar
Thomas White committed
431
432
		return 1;
	}
433
	if ( check_list_symmetry(list2_raw, sym) ) {
Thomas White's avatar
Thomas White committed
434
		ERROR("The second input reflection list does not appear to"
Thomas White's avatar
Thomas White committed
435
		      " have symmetry %s\n", symmetry_name(sym));
Thomas White's avatar
Thomas White committed
436
437
438
		return 1;
	}

439
440
441
442
	list1 = asymmetric_indices(list1_raw, sym);
	list2 = asymmetric_indices(list2_raw, sym);

	/* Find common reflections and calculate ratio */
Thomas White's avatar
Thomas White committed
443
	ratio = reflist_new();
444
	ncom = 0;
445
	nrej = 0;
Thomas White's avatar
Thomas White committed
446
447
	for ( refl1 = first_refl(list1, &iter);
	      refl1 != NULL;
Thomas White's avatar
Thomas White committed
448
449
	      refl1 = next_refl(refl1, iter) )
	{
Thomas White's avatar
Thomas White committed
450
		signed int h, k, l;
451
		double val1, val2;
452
		double esd1, esd2;
Thomas White's avatar
Thomas White committed
453
454
		Reflection *refl2;
		Reflection *tr;
455

Thomas White's avatar
Thomas White committed
456
		get_indices(refl1, &h, &k, &l);
457

458
459
		refl2 = find_refl(list2, h, k, l);
		if ( refl2 == NULL ) continue;
460

Thomas White's avatar
Thomas White committed
461
462
463
		val1 = get_intensity(refl1);
		val2 = get_intensity(refl2);

464
465
466
467
468
469
470
471
472
473
474
475
		esd1 = get_esd_intensity(refl1);
		esd2 = get_esd_intensity(refl2);

		if ( (val1 < sigma_cutoff * esd1)
		  || (val2 < sigma_cutoff * esd2) )
		{
			nrej++;
			continue;
		}

		ncom++;

Thomas White's avatar
Thomas White committed
476
477
		/* Add divided version to 'output' list */
		tr = add_refl(ratio, h, k, l);
478
		set_intensity(tr, val1/val2);
479
		set_redundancy(tr, 1);
480
	}
Thomas White's avatar
Thomas White committed
481

482
	if ( ratiofile != NULL ) {
483
		write_reflist(ratiofile, ratio);
484
485
486
	}
	reflist_free(ratio);

487
488
	gsl_set_error_handler_off();

489
490
491
492
493
494
495
	STATUS("%i,%i reflections: %i in common where both versions have "
	       "I/sigma(I) >= %f.\n",
	       num_reflections(list1), num_reflections(list2), ncom,
	       sigma_cutoff);

	STATUS("Discarded %i reflections because either or both versions "
	       " had I/sigma(I) < %f\n", nrej, sigma_cutoff);
Thomas White's avatar
Thomas White committed
496

497
	R1 = stat_r1_ignore(list1, list2, &scale_r1fi, config_unity);
Thomas White's avatar
Thomas White committed
498
499
	STATUS("R1(F) = %5.4f %% (scale=%5.2e)"
	       " (ignoring negative intensities)\n",
500
	       R1*100.0, scale_r1fi);
Thomas White's avatar
Thomas White committed
501

502
	R1 = stat_r1_zero(list1, list2, &scale_r1, config_unity);
Thomas White's avatar
Thomas White committed
503
504
	STATUS("R1(F) = %5.4f %% (scale=%5.2e)"
	       " (zeroing negative intensities)\n",
505
	       R1*100.0, scale_r1);
Thomas White's avatar
Thomas White committed
506

507
	R2 = stat_r2(list1, list2, &scale_r2, config_unity);
508
	STATUS("R2(I) = %5.4f %% (scale=%5.2e)\n", R2*100.0, scale_r2);
Thomas White's avatar
Thomas White committed
509

510
	R1i = stat_r1_i(list1, list2, &scale_r1i, config_unity);
511
	STATUS("R1(I) = %5.4f %% (scale=%5.2e)\n", R1i*100.0, scale_r1i);
Thomas White's avatar
Thomas White committed
512

513
	Rdiff = stat_rdiff_ignore(list1, list2, &scale_rdig,
514
	                          config_unity);
Thomas White's avatar
Thomas White committed
515
516
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e)"
	       " (ignoring negative intensities)\n",
517
	       Rdiff*100.0, scale_rdig);
Thomas White's avatar
Thomas White committed
518

519
	Rdiff = stat_rdiff_zero(list1, list2, &scale, config_unity);
Thomas White's avatar
Thomas White committed
520
521
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e)"
	       " (zeroing negative intensities)\n",
Thomas White's avatar
Thomas White committed
522
523
	       Rdiff*100.0, scale);

524
	Rdiff = stat_rdiff_intensity(list1, list2, &scale_rintint,
525
	                             config_unity);
Thomas White's avatar
Thomas White committed
526
	STATUS("Rint(I) = %5.4f %% (scale=%5.2e)\n",
527
	       Rdiff*100.0, scale_rintint);
Thomas White's avatar
Thomas White committed
528

529
	pearson = stat_pearson_i(list1, list2);
Thomas White's avatar
Thomas White committed
530
531
	STATUS("Pearson r(I) = %5.4f\n", pearson);

532
	pearson = stat_pearson_f_ignore(list1, list2);
Thomas White's avatar
Thomas White committed
533
534
535
	STATUS("Pearson r(F) = %5.4f (ignoring negative intensities)\n",
	       pearson);

536
	pearson = stat_pearson_f_zero(list1, list2);
Thomas White's avatar
Thomas White committed
537
538
	STATUS("Pearson r(F) = %5.4f (zeroing negative intensities)\n",
	       pearson);
Thomas White's avatar
Thomas White committed
539

540
541
542
543
544
545
546
547
548
549
	switch ( config_shells ) {
		case R_SHELL_R1I : scale_for_shells = scale_r1i;  break;
		case R_SHELL_R1F : scale_for_shells = scale_r1;  break;
		case R_SHELL_RSPLIT : scale_for_shells = scale_rintint;  break;
		default : scale_for_shells = 0.0;
	}

	if ( config_shells != R_SHELL_NONE ) {
		plot_shells(list1, list2, scale_for_shells,
		            cell, rmin_fix, rmax_fix, config_shells);
Thomas White's avatar
Thomas White committed
550
551
	}

552
553
	return 0;
}