compare_hkl.c 12.5 KB
Newer Older
1
2
3
4
5
/*
 * compare_hkl.c
 *
 * Compare reflection lists
 *
6
7
8
9
 * Copyright © 2012 Deutsches Elektronen-Synchrotron DESY,
 *                  a research centre of the Helmholtz Association.
 *
 * Authors:
10
 *   2010-2012 Thomas White <taw@physics.org>
11
 *
Thomas White's avatar
Thomas White committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 * This file is part of CrystFEL.
 *
 * CrystFEL is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CrystFEL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CrystFEL.  If not, see <http://www.gnu.org/licenses/>.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
Thomas White's avatar
Thomas White committed
40
#include <assert.h>
41
#include <gsl/gsl_errno.h>
42
43

#include "utils.h"
Thomas White's avatar
Thomas White committed
44
#include "statistics.h"
45
#include "symmetry.h"
Thomas White's avatar
Thomas White committed
46
#include "reflist-utils.h"
47
#include "cell-utils.h"
48
49


Thomas White's avatar
Thomas White committed
50
/* Number of bins for plot of resolution shells */
51
#define NBINS (10)
Thomas White's avatar
Thomas White committed
52
53


54
55
56
57
58
59
60
61
62
63
64
enum r_shell
{
	R_SHELL_NONE,
	R_SHELL_R1I,
	R_SHELL_R1F,
	R_SHELL_RSPLIT,
};


static enum r_shell get_r_shell(const char *s)
{
65
66
67
	if ( strcasecmp(s, "r1i") == 0 ) return R_SHELL_R1I;
	if ( strcasecmp(s, "r1f") == 0 ) return R_SHELL_R1F;
	if ( strcasecmp(s, "rsplit") == 0 ) return R_SHELL_RSPLIT;
68
69
70
71
72
73
74

	ERROR("Unknown R-factor '%s' - try '--shells=rsplit', or --help for"
	      " more possibilities.\n", s);
	exit(1);
}


75
76
static void show_help(const char *s)
{
77
	printf("Syntax: %s [options] <file1.hkl> <file2.hkl>\n\n", s);
78
79
80
81
	printf(
"Compare intensity lists.\n"
"\n"
"  -h, --help                 Display this help message.\n"
Thomas White's avatar
Thomas White committed
82
"  -o, --ratio=<filename>     Specify output filename for ratios.\n"
83
"  -y, --symmetry=<sym>       The symmetry of both the input files.\n"
Thomas White's avatar
Thomas White committed
84
"  -p, --pdb=<filename>       PDB file to use.\n"
85
86
"      --shells=<FoM>         Plot this figure of merit in resolution shells.\n"
"                              Choose from: 'Rsplit', 'R1f' and 'R1i'.\n"
87
88
"      --rmin=<res>           Fix lower resolution limit for --shells (m^-1).\n"
"      --rmax=<res>           Fix upper resolution limit for --shells (m^-1).\n"
89
"      --sigma-cutoff=<n>     Discard reflections with I/sigma(I) < n.\n"
90
91
92
93
"\n");
}


94
static void plot_shells(RefList *list1, RefList *list2, double scale,
95
96
                        UnitCell *cell, double rmin_fix, double rmax_fix,
                        enum r_shell config_shells)
Thomas White's avatar
Thomas White committed
97
98
{
	double num[NBINS];
Thomas White's avatar
Thomas White committed
99
	int cts[NBINS];
100
101
102
103
	double total_vol, vol_per_shell;
	double rmins[NBINS];
	double rmaxs[NBINS];
	double rmin, rmax;
Thomas White's avatar
Thomas White committed
104
	int i;
Thomas White's avatar
Thomas White committed
105
106
	Reflection *refl1;
	RefListIterator *iter;
107
	FILE *fh;
108
	double den[NBINS];
109
	int ctot, nout;
Thomas White's avatar
Thomas White committed
110
111

	if ( cell == NULL ) {
Thomas White's avatar
Thomas White committed
112
		ERROR("Need the unit cell to plot resolution shells.\n");
Thomas White's avatar
Thomas White committed
113
114
115
116
117
		return;
	}

	for ( i=0; i<NBINS; i++ ) {
		num[i] = 0.0;
118
		den[i] = 0.0;
Thomas White's avatar
Thomas White committed
119
		cts[i] = 0;
Thomas White's avatar
Thomas White committed
120
121
	}

122
123
	/* Find resolution limits */
	resolution_limits(list1, cell, &rmin, &rmax);
124
	STATUS("1/d goes from %f to %f nm^-1\n", rmin/1e9, rmax/1e9);
125

126
127
	/* Widen the range just a little bit */
	rmin -= 0.001e9;
128
129
	rmax += 0.001e9;

130
131
132
	/* Fixed resolution shells if needed */
	if ( rmin_fix > 0.0 ) rmin = rmin_fix;
	if ( rmax_fix > 0.0 ) rmax = rmax_fix;
133

Thomas White's avatar
Thomas White committed
134
	/* Calculate the resolution bins */
135
136
137
138
139
140
141
142
143
144
	total_vol = pow(rmax, 3.0) - pow(rmin, 3.0);
	vol_per_shell = total_vol / NBINS;
	rmins[0] = rmin;
	for ( i=1; i<NBINS; i++ ) {

		double r;

		r = vol_per_shell + pow(rmins[i-1], 3.0);
		r = pow(r, 1.0/3.0);

145
		/* Shells of constant volume */
146
147
148
		rmaxs[i-1] = r;
		rmins[i] = r;

149
150
151
152
		/* Shells of constant thickness */
		//rmins[i] = rmins[i-1] + (rmax-rmin)/NBINS;
		//rmaxs[i-1] = rmins[i-1] + (rmax-rmin)/NBINS;

153
154
155
156
157
158
159
160
		STATUS("Shell %i: %f to %f\n", i-1,
		       rmins[i-1]/1e9, rmaxs[i-1]/1e9);

	}
	rmaxs[NBINS-1] = rmax;
	STATUS("Shell %i: %f to %f\n", NBINS-1,
	       rmins[NBINS-1]/1e9, rmaxs[NBINS-1]/1e9);

161
	ctot = 0;  nout = 0;
Thomas White's avatar
Thomas White committed
162
163
	for ( refl1 = first_refl(list1, &iter);
	      refl1 != NULL;
164
165
	      refl1 = next_refl(refl1, iter) )
	{
Thomas White's avatar
Thomas White committed
166
		signed int h, k, l;
167
		double d;
Thomas White's avatar
Thomas White committed
168
		int bin;
169
		double i1, i2, f1, f2;
170
		int j;
171
		Reflection *refl2;
Thomas White's avatar
Thomas White committed
172

Thomas White's avatar
Thomas White committed
173
		get_indices(refl1, &h, &k, &l);
174
		d = 2.0 * resolution(cell, h, k, l);
Thomas White's avatar
Thomas White committed
175

176
177
178
179
180
181
182
		bin = -1;
		for ( j=0; j<NBINS; j++ ) {
			if ( (d>rmins[j]) && (d<=rmaxs[j]) ) {
				bin = j;
				break;
			}
		}
183
184

		/* Outside resolution range? */
185
186
187
188
		if ( bin == -1 ) {
			nout++;
			continue;
		}
Thomas White's avatar
Thomas White committed
189

190
191
192
		refl2 = find_refl(list2, h, k, l);
		if ( refl2 == NULL ) continue;

Thomas White's avatar
Thomas White committed
193
		i1 = get_intensity(refl1);
194
195
196
197
198
199
200
		i2 = get_intensity(refl2);
		f1 = i1 > 0.0 ? sqrt(i1) : 0.0;
		f2 = i2 > 0.0 ? sqrt(i2) : 0.0;

		switch ( config_shells ) {

			case R_SHELL_RSPLIT :
201
202
			num[bin] += fabs(i1 - i2);
			den[bin] += i1 + i2;
203
204
205
206
			break;

			case R_SHELL_R1I :
			num[bin] += fabs(i1 - scale*i2);
207
			den[bin] += i1;
208
209
210
211
			break;

			case R_SHELL_R1F :
			num[bin] += fabs(f1 - scale*f2);
212
			den[bin] += f1;
213
214
215
216
217
			break;

			default : break;

		}
Thomas White's avatar
Thomas White committed
218

Thomas White's avatar
Thomas White committed
219
		ctot++;
220
		cts[bin]++;
Thomas White's avatar
Thomas White committed
221
222
	}

223
224
225
226
227
	if ( nout ) {
		STATUS("Warning; %i reflections outside resolution range.\n",
		       nout);
	}

228
229
230
231
232
233
	fh = fopen("shells.dat", "w");
	if ( fh == NULL ) {
		ERROR("Couldn't open 'shells.dat'\n");
		return;
	}

234
235
236
	switch ( config_shells ) {

		case R_SHELL_RSPLIT :
237
		fprintf(fh, "1/d centre   Rsplit / %%     nref       d (A)\n");
238
239
240
		break;

		case R_SHELL_R1I :
241
		fprintf(fh, "1/d centre   R1(I) / %%      nref       d (A)\n");
242
243
244
		break;

		case R_SHELL_R1F :
245
		fprintf(fh, "1/d centre   R1(F) ign -/%%  nref       d (A)\n");
246
247
248
		break;

		default :
249
		fprintf(fh, "1/d centre   0.0             nref       d (A)\n");
250
251
252
		break;

	}
253

Thomas White's avatar
Thomas White committed
254
255
	for ( i=0; i<NBINS; i++ ) {

256
		double r, cen;
257
		cen = rmins[i] + (rmaxs[i] - rmins[i])/2.0;
258
259
260
261

		switch ( config_shells ) {

			case R_SHELL_RSPLIT :
262
			r = 2.0*(num[i]/den[i]) / sqrt(2.0);
263
264
265
266
			break;

			case R_SHELL_R1I :
			case R_SHELL_R1F :
267
			r = num[i]/den[i];
268
269
270
271
272
273
274
275
			break;

			default :
			r = 0.0;
			break;

		}

276
277
		fprintf(fh, "%10.3f %10.2f %10i %10.2f\n",
		        cen*1.0e-9, r*100.0, cts[i], (1.0/cen)*1e10);
Thomas White's avatar
Thomas White committed
278
279
280
281

	}

	fclose(fh);
Thomas White's avatar
Thomas White committed
282
283

	STATUS("Resolution shell information written to shells.dat.\n");
Thomas White's avatar
Thomas White committed
284
285
286
}


287
288
289
int main(int argc, char *argv[])
{
	int c;
290
	UnitCell *cell;
Thomas White's avatar
Thomas White committed
291
	char *ratiofile = NULL;
292
293
	char *afile = NULL;
	char *bfile = NULL;
Thomas White's avatar
Thomas White committed
294
295
	char *sym_str = NULL;
	SymOpList *sym;
296
	double scale, scale_r2, scale_rdig, R1, R2, R1i, Rdiff, pearson;
297
	double scale_rintint, scale_r1i, scale_r1, scale_r1fi;
298
	int ncom, nrej;
Thomas White's avatar
Thomas White committed
299
300
	RefList *list1;
	RefList *list2;
301
302
	RefList *list1_raw;
	RefList *list2_raw;
Thomas White's avatar
Thomas White committed
303
	RefList *ratio;
304
	enum r_shell config_shells = R_SHELL_NONE;
305
	char *pdb = NULL;
306
307
	float rmin_fix = -1.0;
	float rmax_fix = -1.0;
Thomas White's avatar
Thomas White committed
308
309
	Reflection *refl1;
	RefListIterator *iter;
310
	int config_unity = 0;
311
	double scale_for_shells;
312
	float sigma_cutoff = -INFINITY;
313
314
315
316

	/* Long options */
	const struct option longopts[] = {
		{"help",               0, NULL,               'h'},
Thomas White's avatar
Thomas White committed
317
		{"ratio" ,             1, NULL,               'o'},
318
		{"symmetry",           1, NULL,               'y'},
319
		{"pdb",                1, NULL,               'p'},
320
321
322
323
		{"rmin",               1, NULL,                2},
		{"rmax",               1, NULL,                3},
		{"shells",             1, NULL,                4},
		{"sigma-cutoff",       1, NULL,                5},
324
325
326
327
		{0, 0, NULL, 0}
	};

	/* Short options */
328
329
330
	while ((c = getopt_long(argc, argv, "ho:y:p:u",
	                        longopts, NULL)) != -1)
	{
331
332

		switch (c) {
Thomas White's avatar
Thomas White committed
333
334

			case 'h' :
335
336
337
			show_help(argv[0]);
			return 0;

Thomas White's avatar
Thomas White committed
338
			case 'o' :
Thomas White's avatar
Thomas White committed
339
			ratiofile = strdup(optarg);
340
341
			break;

Thomas White's avatar
Thomas White committed
342
			case 'y' :
Thomas White's avatar
Thomas White committed
343
			sym_str = strdup(optarg);
344
345
			break;

Thomas White's avatar
Thomas White committed
346
			case 'p' :
347
348
349
			pdb = strdup(optarg);
			break;

Thomas White's avatar
Thomas White committed
350
			case 'u' :
351
352
353
			config_unity = 1;
			break;

Thomas White's avatar
Thomas White committed
354
			case 0 :
355
356
			break;

Thomas White's avatar
Thomas White committed
357
			case 2 :
358
359
360
361
362
363
			if ( sscanf(optarg, "%e", &rmin_fix) != 1 ) {
				ERROR("Invalid value for --rmin\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
364
			case 3 :
365
366
367
368
369
370
			if ( sscanf(optarg, "%e", &rmax_fix) != 1 ) {
				ERROR("Invalid value for --rmax\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
371
			case 4 :
372
373
374
			config_shells = get_r_shell(optarg);
			break;

375
376
377
378
379
380
381
			case 5 :
			if ( sscanf(optarg, "%f", &sigma_cutoff) != 1 ) {
				ERROR("Invalid value for --sigma-cutoff\n");
				return 1;
			}
			break;

Thomas White's avatar
Thomas White committed
382
			default :
383
384
			ERROR("Unhandled option '%c'\n", c);
			break;
Thomas White's avatar
Thomas White committed
385

386
387
388
389
		}

	}

390
391
392
393
394
	if ( argc != (optind+2) ) {
		ERROR("Please provide exactly two HKL files to compare.\n");
		return 1;
	}

Thomas White's avatar
Thomas White committed
395
396
	if ( sym_str == NULL ) {
		sym_str = strdup("1");
397
	}
Thomas White's avatar
Thomas White committed
398
399
	sym = get_pointgroup(sym_str);
	free(sym_str);
400

401
402
403
	afile = strdup(argv[optind++]);
	bfile = strdup(argv[optind]);

404
405
406
407
408
409
410
	if ( pdb == NULL ) {
		pdb = strdup("molecule.pdb");
	}

	cell = load_cell_from_pdb(pdb);
	free(pdb);

411
412
	list1_raw = read_reflections(afile);
	if ( list1_raw == NULL ) {
Thomas White's avatar
Thomas White committed
413
		ERROR("Couldn't read file '%s'\n", afile);
414
415
		return 1;
	}
Thomas White's avatar
Thomas White committed
416

417
418
	list2_raw = read_reflections(bfile);
	if ( list2_raw == NULL ) {
Thomas White's avatar
Thomas White committed
419
		ERROR("Couldn't read file '%s'\n", bfile);
420
421
422
		return 1;
	}

Thomas White's avatar
Thomas White committed
423
	/* Check that the intensities have the correct symmetry */
424
	if ( check_list_symmetry(list1_raw, sym) ) {
Thomas White's avatar
Thomas White committed
425
		ERROR("The first input reflection list does not appear to"
Thomas White's avatar
Thomas White committed
426
		      " have symmetry %s\n", symmetry_name(sym));
Thomas White's avatar
Thomas White committed
427
428
		return 1;
	}
429
	if ( check_list_symmetry(list2_raw, sym) ) {
Thomas White's avatar
Thomas White committed
430
		ERROR("The second input reflection list does not appear to"
Thomas White's avatar
Thomas White committed
431
		      " have symmetry %s\n", symmetry_name(sym));
Thomas White's avatar
Thomas White committed
432
433
434
		return 1;
	}

435
436
437
438
	list1 = asymmetric_indices(list1_raw, sym);
	list2 = asymmetric_indices(list2_raw, sym);

	/* Find common reflections and calculate ratio */
Thomas White's avatar
Thomas White committed
439
	ratio = reflist_new();
440
	ncom = 0;
441
	nrej = 0;
Thomas White's avatar
Thomas White committed
442
443
	for ( refl1 = first_refl(list1, &iter);
	      refl1 != NULL;
Thomas White's avatar
Thomas White committed
444
445
	      refl1 = next_refl(refl1, iter) )
	{
Thomas White's avatar
Thomas White committed
446
		signed int h, k, l;
447
		double val1, val2;
448
		double esd1, esd2;
Thomas White's avatar
Thomas White committed
449
450
		Reflection *refl2;
		Reflection *tr;
451

Thomas White's avatar
Thomas White committed
452
		get_indices(refl1, &h, &k, &l);
453

454
455
		refl2 = find_refl(list2, h, k, l);
		if ( refl2 == NULL ) continue;
456

Thomas White's avatar
Thomas White committed
457
458
459
		val1 = get_intensity(refl1);
		val2 = get_intensity(refl2);

460
461
462
463
464
465
466
467
468
469
470
471
		esd1 = get_esd_intensity(refl1);
		esd2 = get_esd_intensity(refl2);

		if ( (val1 < sigma_cutoff * esd1)
		  || (val2 < sigma_cutoff * esd2) )
		{
			nrej++;
			continue;
		}

		ncom++;

Thomas White's avatar
Thomas White committed
472
473
		/* Add divided version to 'output' list */
		tr = add_refl(ratio, h, k, l);
474
		set_intensity(tr, val1/val2);
475
		set_redundancy(tr, 1);
476
	}
Thomas White's avatar
Thomas White committed
477

478
	if ( ratiofile != NULL ) {
479
		write_reflist(ratiofile, ratio);
480
481
482
	}
	reflist_free(ratio);

483
484
	gsl_set_error_handler_off();

485
486
487
488
489
490
491
	STATUS("%i,%i reflections: %i in common where both versions have "
	       "I/sigma(I) >= %f.\n",
	       num_reflections(list1), num_reflections(list2), ncom,
	       sigma_cutoff);

	STATUS("Discarded %i reflections because either or both versions "
	       " had I/sigma(I) < %f\n", nrej, sigma_cutoff);
Thomas White's avatar
Thomas White committed
492

493
	R1 = stat_r1_ignore(list1, list2, &scale_r1fi, config_unity);
Thomas White's avatar
Thomas White committed
494
495
	STATUS("R1(F) = %5.4f %% (scale=%5.2e)"
	       " (ignoring negative intensities)\n",
496
	       R1*100.0, scale_r1fi);
Thomas White's avatar
Thomas White committed
497

498
	R1 = stat_r1_zero(list1, list2, &scale_r1, config_unity);
Thomas White's avatar
Thomas White committed
499
500
	STATUS("R1(F) = %5.4f %% (scale=%5.2e)"
	       " (zeroing negative intensities)\n",
501
	       R1*100.0, scale_r1);
Thomas White's avatar
Thomas White committed
502

503
	R2 = stat_r2(list1, list2, &scale_r2, config_unity);
504
	STATUS("R2(I) = %5.4f %% (scale=%5.2e)\n", R2*100.0, scale_r2);
Thomas White's avatar
Thomas White committed
505

506
	R1i = stat_r1_i(list1, list2, &scale_r1i, config_unity);
507
	STATUS("R1(I) = %5.4f %% (scale=%5.2e)\n", R1i*100.0, scale_r1i);
Thomas White's avatar
Thomas White committed
508

509
	Rdiff = stat_rdiff_ignore(list1, list2, &scale_rdig,
510
	                          config_unity);
Thomas White's avatar
Thomas White committed
511
512
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e)"
	       " (ignoring negative intensities)\n",
513
	       Rdiff*100.0, scale_rdig);
Thomas White's avatar
Thomas White committed
514

515
	Rdiff = stat_rdiff_zero(list1, list2, &scale, config_unity);
Thomas White's avatar
Thomas White committed
516
517
	STATUS("Rint(F) = %5.4f %% (scale=%5.2e)"
	       " (zeroing negative intensities)\n",
Thomas White's avatar
Thomas White committed
518
519
	       Rdiff*100.0, scale);

520
	Rdiff = stat_rdiff_intensity(list1, list2, &scale_rintint,
521
	                             config_unity);
Thomas White's avatar
Thomas White committed
522
	STATUS("Rint(I) = %5.4f %% (scale=%5.2e)\n",
523
	       Rdiff*100.0, scale_rintint);
Thomas White's avatar
Thomas White committed
524

525
	pearson = stat_pearson_i(list1, list2);
Thomas White's avatar
Thomas White committed
526
527
	STATUS("Pearson r(I) = %5.4f\n", pearson);

528
	pearson = stat_pearson_f_ignore(list1, list2);
Thomas White's avatar
Thomas White committed
529
530
531
	STATUS("Pearson r(F) = %5.4f (ignoring negative intensities)\n",
	       pearson);

532
	pearson = stat_pearson_f_zero(list1, list2);
Thomas White's avatar
Thomas White committed
533
534
	STATUS("Pearson r(F) = %5.4f (zeroing negative intensities)\n",
	       pearson);
Thomas White's avatar
Thomas White committed
535

536
537
538
539
540
541
542
543
544
545
	switch ( config_shells ) {
		case R_SHELL_R1I : scale_for_shells = scale_r1i;  break;
		case R_SHELL_R1F : scale_for_shells = scale_r1;  break;
		case R_SHELL_RSPLIT : scale_for_shells = scale_rintint;  break;
		default : scale_for_shells = 0.0;
	}

	if ( config_shells != R_SHELL_NONE ) {
		plot_shells(list1, list2, scale_for_shells,
		            cell, rmin_fix, rmax_fix, config_shells);
Thomas White's avatar
Thomas White committed
546
547
	}

548
549
	return 0;
}