/* Calculate two-point correlation function for parallel and transverse * components. * * Usage: compcorr OUTFILE DATAFILE CMPFILE method * * * File CMPFILE should have the following format: * * number of points (N) * data low bound (3 components, separated by whitespace) * data high bound (3 components, separated by whitespace) * point 1 (3 components, separated by whitespace) * . * . * . * point N (3 components, separated by whitespace) * * * File DATAFILE should be otherwise similar to CMPFILE, with the following * added to the very beginning of the file: * * number of parallel scale bins (N_p) * bin 1 (2 components, low and high bounds, separated by whitespace) * . * . * bin N_p * number of transverse scale bins (N_t) * bin 1 (2 components, low and high bounds, separated by whitespace) * . * . * bin N_t * * * Result in OUTFILE will have one row for each bin pair, with the following * values: * * parallel bin low bound, high bound, transverse bin low bound, high bound, * value of ksi, Poisson error * * * Written in 2006 for the benefit of Tuorla Observatory Cosmological * Simulations research group by Pauli Pihajoki. * * Copyright (c) 2006 Pauli Pihajoki. All rights reserved. */ #include #include #include #include /* Datatypes. */ typedef unsigned long ULONG; typedef double LDOUBLE; /* Struct defs and functionality using them. */ typedef struct _p3 { LDOUBLE x, y, z; } p3; #define IPROD(a, b) ((a).x*(b).x + (a).y*(b).y + (a).z*(b).z) #define NORM(a) (sqrt(IPROD((a), (a)))) typedef struct _field { ULONG num_pts; p3 low_bound, high_bound; p3 *pts; } field; typedef struct _dataset { int num_p_bins; LDOUBLE **p_bins; int num_t_bins; LDOUBLE **t_bins; field data; } dataset; /* Func defs. */ void read_bins(FILE *f, dataset *storage); void read_data(FILE *f, field *storage); void correlate(int correlation_function, dataset *data, field *cmp, FILE *o); /* A prototype for a correlation function. * Parameters: ratio of N_r/N., DD, DR, RR. */ typedef LDOUBLE (*correlation_function)(LDOUBLE, ULONG, ULONG, ULONG); /* Supported correlation methods. */ #define PEEBLES_HAUSER 1 #define DAVIS_PEEBLES 2 #define HAMILTON 3 #define LANDY_SZALAY 4 #define NUM_METHODS 4 /* Corresponding functions. */ inline LDOUBLE ph_corr(LDOUBLE rat, ULONG dd, ULONG dr, ULONG rr) { return rat*rat*dd/rr - 1; } inline LDOUBLE dp_corr(LDOUBLE rat, ULONG dd, ULONG dr, ULONG rr) { return rat*dd/dr - 1; } inline LDOUBLE ham_corr(LDOUBLE rat, ULONG dd, ULONG dr, ULONG rr) { return (LDOUBLE)dd*rr/(dr*dr) - 1; } inline LDOUBLE ls_corr(LDOUBLE rat, ULONG dd, ULONG dr, ULONG rr) { return 1 + rat*rat*dd/rr - 2*rat*dr/rr; } int main(int argc, char **argv) { FILE *IN, *OUT, *CMP; dataset data; field cmp; int i, corr_func; if (argc != 4 && argc != 5) { printf( "correlator: arguments: outputfile datafile " "comparison sample file [correlation method]\n" "Supported correlation methods:\n" "1 => Peebles-Hauser\n" "2 => Davis-Peebles\n" "3 => Hamilton\n" "4 => Landy-Szalay (default)\n"); exit(EXIT_FAILURE); } /* Verify the correlation selection. */ if (argc == 5) { corr_func = atoi(argv[4]); if (corr_func <= 0 || corr_func > NUM_METHODS) { fprintf(stderr, "Invalid correlation method %d\n", corr_func); exit(EXIT_FAILURE); } } else /* Prefer Landy-Szalay correlation function by default. */ corr_func = 4; /* Open the file handles, exit on errors.*/ OUT = fopen(argv[1], "w"); if (OUT == NULL) { perror("fopen"); exit(EXIT_FAILURE); } IN = fopen(argv[2], "r"); if (IN == NULL) { perror("fopen"); exit(EXIT_FAILURE); } CMP = fopen(argv[3], "r"); if (CMP == NULL) { perror("fopen"); exit(EXIT_FAILURE); } /* Read in the data. */ read_bins(IN, &data); read_data(IN, &data.data); read_data(CMP, &cmp); /* Close input files. */ fclose(IN); fclose(CMP); /* Calculate and store the correlation. */ correlate(corr_func, &data, &cmp, OUT); /* Final cleanup. */ fclose(OUT); free(cmp.pts); free(data.data.pts); for (i=0; i < data.num_p_bins; i++) { free(data.p_bins[i]); } free(data.p_bins); for (i=0; i < data.num_t_bins; i++) { free(data.t_bins[i]); } free(data.t_bins); return 0; } inline void check_malloc(void *p, int line) { if (p == NULL) { fprintf(stderr, "malloc returned null at line %d", line); exit(EXIT_FAILURE); } } void read_bins(FILE *f, dataset *d) { int i; /* Process parallel bins first, then transverse bins. */ /* First the number of bins. */ fscanf(f, "%d", &d->num_p_bins); if (d->num_p_bins <= 0) { fprintf(stderr, "Invalid number of p_bins: %d\n", d->num_p_bins); exit(EXIT_FAILURE); } /* Allocate space. */ printf("Allocating space for %d p_bins...\n", d->num_p_bins); d->p_bins = (LDOUBLE**)malloc(d->num_p_bins * sizeof(LDOUBLE*)); check_malloc(d->p_bins, __LINE__); for (i=0; i < d->num_p_bins; i++) { d->p_bins[i] = (LDOUBLE*)malloc(2 * sizeof(LDOUBLE)); check_malloc(d->p_bins[i], __LINE__); } /* Read the p_bins. */ for (i=0; i < d->num_p_bins; i++) { printf("Reading in bin %d... ", i); fscanf(f, "%lg %lg", &d->p_bins[i][0], &d->p_bins[i][1]); printf("%lg %lg\n", d->p_bins[i][0], d->p_bins[i][1]); } /* First the number of bins. */ fscanf(f, "%d", &d->num_t_bins); if (d->num_t_bins <= 0) { fprintf(stderr, "Invalid number of t_bins: %d\n", d->num_t_bins); exit(EXIT_FAILURE); } /* Allocate space. */ printf("Allocating space for %d t_bins...\n", d->num_t_bins); d->t_bins = (LDOUBLE**)malloc(d->num_t_bins * sizeof(LDOUBLE*)); check_malloc(d->t_bins, __LINE__); for (i=0; i < d->num_t_bins; i++) { d->t_bins[i] = (LDOUBLE*)malloc(2 * sizeof(LDOUBLE)); check_malloc(d->t_bins[i], __LINE__); } /* Read the t_bins. */ for (i=0; i < d->num_t_bins; i++) { printf("Reading in bin %d... ", i); fscanf(f, "%lg %lg", &d->t_bins[i][0], &d->t_bins[i][1]); printf("%lg %lg\n", d->t_bins[i][0], d->t_bins[i][1]); } } void read_data(FILE *f, field *d) { ULONG i; /* First, get the number of points. */ fscanf(f, "%lu", &d->num_pts); if (d->num_pts <= 0) { fprintf(stderr, "Invalid number of data points: %lu\n", d->num_pts); exit(EXIT_FAILURE); } /* Allocate space. */ printf("Allocating space for %lu points...\n", d->num_pts); d->pts = (p3*)malloc((int)d->num_pts * sizeof(p3)); check_malloc(d->pts, __LINE__); /* Low and high bounds. */ printf("Reading high and low bounds...\n"); fscanf(f, "%lg %lg %lg", &d->low_bound.x, &d->low_bound.y, &d->low_bound.z); fscanf(f, "%lg %lg %lg", &d->high_bound.x, &d->high_bound.y, &d->high_bound.z); printf("low: %lg %lg %lg high: %lg %lg %lg\n", d->low_bound.x, d->low_bound.y, d->low_bound.z, d->high_bound.x, d->high_bound.y, d->high_bound.z); /* Then get the data. */ for (i=0; i < d->num_pts; i++) { fscanf(f, "%lg %lg %lg", &d->pts[i].x, &d->pts[i].y, &d->pts[i].z); } } void correlate(int corr_func, dataset *d, field *c, FILE *out) { ULONG **dd; ULONG **dr; ULONG **rr; ULONG i, j; int k, l; p3 sep, los; LDOUBLE para, trans; correlation_function cfunc; /* Get the right function. */ switch (corr_func) { case PEEBLES_HAUSER: cfunc = &ph_corr; break; case DAVIS_PEEBLES: cfunc = &dp_corr; break; case HAMILTON: cfunc = &ham_corr; break; case LANDY_SZALAY: default: cfunc = &ls_corr; break; } /* Allocate. */ dd = (ULONG**)malloc(d->num_p_bins * sizeof(ULONG*)); check_malloc(dd, __LINE__); dr = (ULONG**)malloc(d->num_p_bins * sizeof(ULONG*)); check_malloc(dr, __LINE__); rr = (ULONG**)malloc(d->num_p_bins * sizeof(ULONG*)); check_malloc(rr, __LINE__); for (k=0; k < d->num_p_bins; k++) { dd[k] = (ULONG*)malloc(d->num_t_bins * sizeof(ULONG)); check_malloc(dd[k], __LINE__); dr[k] = (ULONG*)malloc(d->num_t_bins * sizeof(ULONG)); check_malloc(dr[k], __LINE__); rr[k] = (ULONG*)malloc(d->num_t_bins * sizeof(ULONG)); check_malloc(rr[k], __LINE__); } /* Zero the storages. */ for (k=0; k < d->num_p_bins; k++) { memset(dd[k], 0, d->num_t_bins * sizeof(ULONG)); memset(dr[k], 0, d->num_t_bins * sizeof(ULONG)); memset(rr[k], 0, d->num_t_bins * sizeof(ULONG)); } /* Calculate DD and DR pairs, and add for each bin. */ /* XXX: We require that one pair can only end up in one bin, so we can * stop going through the bins as soon as we find a match. This means * that overlapping bins to achieve some effect will not result in * expected behaviour. */ printf("DD and DR pairs\n"); for (i=0; i < d->data.num_pts; i++) { /* Print some progress info. */ printf("%lu ", i); if (i != 0 && (i % 10 == 0)) printf("\n"); for (j=i+1; j < d->data.num_pts; j++) { sep.x = d->data.pts[i].x - d->data.pts[j].x; sep.y = d->data.pts[i].y - d->data.pts[j].y; sep.z = d->data.pts[i].z - d->data.pts[j].z; los.x = 0.5*(d->data.pts[i].x + d->data.pts[j].x); los.y = 0.5*(d->data.pts[i].y + d->data.pts[j].y); los.z = 0.5*(d->data.pts[i].z + d->data.pts[j].z); para = IPROD(sep, los)/NORM(los); trans = IPROD(sep, sep) - para*para; for (k=0; k < d->num_p_bins; k++) { for (l=0; l < d->num_t_bins; l++) { if ( d->p_bins[k][0] <= para && para <= d->p_bins[k][1] && d->t_bins[l][0] <= trans && trans <= d->t_bins[l][1]) { dd[k][l] += 2; break; } } } } for (j=0; j < c->num_pts; j++) { sep.x = d->data.pts[i].x - c->pts[j].x; sep.y = d->data.pts[i].y - c->pts[j].y; sep.z = d->data.pts[i].z - c->pts[j].z; los.x = 0.5*(d->data.pts[i].x + c->pts[j].x); los.y = 0.5*(d->data.pts[i].y + c->pts[j].y); los.z = 0.5*(d->data.pts[i].z + c->pts[j].z); para = IPROD(sep, los)/NORM(los); trans = IPROD(sep, sep) - para*para; for (k=0; k < d->num_p_bins; k++) { for (l=0; l < d->num_t_bins; l++) { if ( d->p_bins[k][0] <= para && para <= d->p_bins[k][1] && d->t_bins[l][0] <= trans && trans <= d->t_bins[l][1]) { dr[k][l]++; break; } } } } } printf("\n"); /* Calculate RR pairs. */ printf("RR pairs\n"); for (i=0; i < c->num_pts; i++) { /* Print some progress info. */ printf("%lu ", i); if (i != 0 && (i % 10 == 0)) printf("\n"); for (j=i+1; j < c->num_pts; j++) { sep.x = c->pts[i].x - c->pts[j].x; sep.y = c->pts[i].y - c->pts[j].y; sep.z = c->pts[i].z - c->pts[j].z; los.x = 0.5*(c->pts[i].x + c->pts[j].x); los.y = 0.5*(c->pts[i].y + c->pts[j].y); los.z = 0.5*(c->pts[i].z + c->pts[j].z); para = IPROD(sep, los)/NORM(los); trans = IPROD(sep, sep) - para*para; for (k=0; k < d->num_p_bins; k++) { for (l=0; l < d->num_t_bins; l++) { if ( d->p_bins[k][0] <= para && para <= d->p_bins[k][1] && d->t_bins[l][0] <= trans && trans <= d->t_bins[l][1]) { rr[k][l] += 2; break; } } } } } printf("\n"); /* Calculate the correlation and Poisson error * for each bin. Then print bin low bound, high bound, ksi and error * on each row. */ { LDOUBLE ksi, error; LDOUBLE rat = c->num_pts/(LDOUBLE)d->data.num_pts; for (k=0; k < d->num_p_bins; k++) { for (l=0; l < d->num_t_bins; l++) { printf("Bin %d-%d [%lg, %lg] [%lg, %lg]\n", k, l, d->p_bins[k][0], d->p_bins[k][1], d->t_bins[l][0], d->t_bins[l][1]); printf("dd %lu ", dd[k][l]); printf("dr %lu ", dr[k][l]); printf("rr %lu\n", rr[k][l]); if (rr[k][l] == 0) { /* XXX: Something like this is what _should_ * be done, however, it would require * enabling GNU extensions or somesuch. */ /* ksi = error = NAN; */ fprintf(out, "%lg %lg %lg %lg %s %s\n", d->p_bins[k][0], d->p_bins[k][1], d->t_bins[l][0], d->t_bins[l][1], "NaN", "NaN"); } else { /* Use the given estimator. */ ksi = cfunc(rat, dd[k][l], dr[k][l], rr[k][l]); /* For the Poisson errors, we have: * (1+ksi)/sqrt(x), where x can be DD, * (N/N_r)*DR or (N/N_r)^2*RR. Of these, * Martinez & Saar suggest using one of the * latter two. Of these, again the latter * is more convenient, as the RR counts should * be consistently large. */ /* error = (1 + ksi)/sqrt(dd[k]); error = (1 + ksi)/sqrt(dr[k]/rat); */ error = (1 + ksi)/sqrt(rr[k][l]/(rat*rat)); fprintf(out, "%lg %lg %lg %lg %lg %lg\n", d->p_bins[k][0], d->p_bins[k][1], d->t_bins[l][0], d->t_bins[l][1], ksi, error); } } } } /* Cleanup. */ for (k=0; k < d->num_t_bins; k++) { free(dd[k]); free(dr[k]); free(rr[k]); } free(dd); free(dr); free(rr); }