source: grass/trunk/raster/r.univar/stats.c

Last change on this file was 74213, checked in by mmetz, 5 years ago

r.univar: report stats with nan if a given zone has only NULL values

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id
  • Property svn:mime-type set to text/x-csrc
File size: 15.7 KB
Line 
1/*
2 * Calculates univariate statistics from the non-null cells
3 *
4 * Copyright (C) 2004-2007 by the GRASS Development Team
5 * Author(s): Hamish Bowman, University of Otago, New Zealand
6 * Martin Landa and Soeren Gebbert
7 *
8 * This program is free software under the GNU General Public
9 * License (>=v2). Read the file COPYING that comes with GRASS
10 * for details.
11 *
12 */
13
14#include "globals.h"
15
16/* *************************************************************** */
17/* **** univar_stat constructor ********************************** */
18/* *************************************************************** */
19univar_stat *create_univar_stat_struct(int map_type, int n_perc)
20{
21 univar_stat *stats;
22 int i;
23 int n_zones = zone_info.n_zones;
24
25 if (n_zones == 0)
26 n_zones = 1;
27
28 stats = (univar_stat *) G_calloc(n_zones, sizeof(univar_stat));
29
30 for (i = 0; i < n_zones; i++) {
31 stats[i].sum = 0.0;
32 stats[i].sumsq = 0.0;
33 stats[i].min = 0.0 / 0.0; /* set to nan as default */
34 stats[i].max = 0.0 / 0.0; /* set to nan as default */
35 stats[i].n_perc = n_perc;
36 if (n_perc > 0)
37 stats[i].perc = (double *)G_malloc(n_perc * sizeof(double));
38 else
39 stats[i].perc = NULL;
40 stats[i].sum_abs = 0.0;
41 stats[i].n = 0;
42 stats[i].size = 0;
43 stats[i].dcell_array = NULL;
44 stats[i].fcell_array = NULL;
45 stats[i].cell_array = NULL;
46 stats[i].map_type = map_type;
47
48 stats[i].n_alloc = 0;
49
50 stats[i].first = TRUE;
51
52 /* allocate memory for extended computation */
53 /* changed to on-demand block allocation */
54
55/* if (param.extended->answer) {
56 if (map_type == DCELL_TYPE) {
57 stats[i].dcell_array = NULL;
58 }
59 else if (map_type == FCELL_TYPE) {
60 stats[i].fcell_array =NULL;
61 }
62 else if (map_type == CELL_TYPE) {
63 stats[i].cell_array = NULL;
64 }
65 }
66*/
67
68 }
69
70 return stats;
71}
72
73
74/* *************************************************************** */
75/* **** univar_stat destructor *********************************** */
76/* *************************************************************** */
77void free_univar_stat_struct(univar_stat * stats)
78{
79 int i;
80 int n_zones = zone_info.n_zones;
81
82 if (n_zones == 0)
83 n_zones = 1;
84
85 for (i = 0; i < n_zones; i++){
86 if (stats[i].perc)
87 G_free(stats[i].perc);
88 if (stats[i].dcell_array)
89 G_free(stats[i].dcell_array);
90 if (stats[i].fcell_array)
91 G_free(stats[i].fcell_array);
92 if (stats[i].cell_array)
93 G_free(stats[i].cell_array);
94 }
95
96 G_free(stats);
97
98 return;
99}
100
101
102/* *************************************************************** */
103/* **** compute and print univar statistics to stdout ************ */
104/* *************************************************************** */
105int print_stats(univar_stat * stats)
106{
107 int z, n_zones = zone_info.n_zones;
108
109 if (n_zones == 0)
110 n_zones = 1;
111
112 for (z = 0; z < n_zones; z++) {
113 char sum_str[100];
114 double mean, variance, stdev, var_coef;
115
116 /* for extended stats */
117 double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
118 double median = 0.0;
119 unsigned int i;
120 int qpos_25, qpos_75, *qpos_perc;
121
122 /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
123 mean = stats[z].sum / stats[z].n;
124 variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
125 if (variance < GRASS_EPSILON)
126 variance = 0.0;
127 stdev = sqrt(variance);
128 var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
129
130 if (stats[z].n == 0)
131 stats[z].sum = stats[z].sum_abs = 0.0 / 0.0;
132 sprintf(sum_str, "%.15g", stats[z].sum);
133 G_trim_decimal(sum_str);
134
135
136 if (!param.shell_style->answer) {
137 if (zone_info.n_zones) {
138 int z_cat = z + zone_info.min;
139 fprintf(stdout, "\nzone %d %s\n\n", z_cat, Rast_get_c_cat(&z_cat, &(zone_info.cats)));
140 }
141 fprintf(stdout, "total null and non-null cells: %lu\n", stats[z].size);
142 fprintf(stdout, "total null cells: %lu\n\n", stats[z].size - stats[z].n);
143 fprintf(stdout, "Of the non-null cells:\n----------------------\n");
144 }
145
146 if (param.shell_style->answer) {
147 if (zone_info.n_zones) {
148 int z_cat = z + zone_info.min;
149 fprintf(stdout, "zone=%d;%s\n", z_cat, Rast_get_c_cat(&z_cat, &(zone_info.cats)));
150 }
151 fprintf(stdout, "n=%lu\n", stats[z].n);
152 fprintf(stdout, "null_cells=%lu\n", stats[z].size - stats[z].n);
153 fprintf(stdout, "cells=%lu\n", stats->size);
154 fprintf(stdout, "min=%.15g\n", stats[z].min);
155 fprintf(stdout, "max=%.15g\n", stats[z].max);
156 fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min);
157 fprintf(stdout, "mean=%.15g\n", mean);
158 fprintf(stdout, "mean_of_abs=%.15g\n", stats[z].sum_abs / stats[z].n);
159 fprintf(stdout, "stddev=%.15g\n", stdev);
160 fprintf(stdout, "variance=%.15g\n", variance);
161 fprintf(stdout, "coeff_var=%.15g\n", var_coef);
162 fprintf(stdout, "sum=%s\n", sum_str);
163 }
164 else {
165 fprintf(stdout, "n: %lu\n", stats[z].n);
166 fprintf(stdout, "minimum: %g\n", stats[z].min);
167 fprintf(stdout, "maximum: %g\n", stats[z].max);
168 fprintf(stdout, "range: %g\n", stats[z].max - stats[z].min);
169 fprintf(stdout, "mean: %g\n", mean);
170 fprintf(stdout, "mean of absolute values: %g\n",
171 stats[z].sum_abs / stats[z].n);
172 fprintf(stdout, "standard deviation: %g\n", stdev);
173 fprintf(stdout, "variance: %g\n", variance);
174 fprintf(stdout, "variation coefficient: %g %%\n", var_coef);
175 fprintf(stdout, "sum: %s\n", sum_str);
176 }
177
178
179 /* TODO: mode, skewness, kurtosis */
180 if (param.extended->answer) {
181 qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
182 quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
183
184 if (stats[z].n == 0) {
185 quartile_25 = median = quartile_75 = 0.0 / 0.0;
186 for (i = 0; i < stats[z].n_perc; i++)
187 quartile_perc[i] = 0.0 / 0.0;
188 }
189 else {
190 for (i = 0; i < stats[z].n_perc; i++) {
191 qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
192 }
193 qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
194 qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
195
196 switch (stats[z].map_type) {
197 case CELL_TYPE:
198 heapsort_int(stats[z].cell_array, stats[z].n);
199
200 quartile_25 = (double)stats[z].cell_array[qpos_25];
201 if (stats[z].n % 2) /* odd */
202 median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
203 else /* even */
204 median =
205 (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
206 stats[z].cell_array[stats[z].n / 2]) / 2.0;
207 quartile_75 = (double)stats[z].cell_array[qpos_75];
208 for (i = 0; i < stats[z].n_perc; i++) {
209 quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
210 }
211 break;
212
213 case FCELL_TYPE:
214 heapsort_float(stats[z].fcell_array, stats[z].n);
215
216 quartile_25 = (double)stats[z].fcell_array[qpos_25];
217 if (stats[z].n % 2) /* odd */
218 median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
219 else /* even */
220 median =
221 (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
222 stats[z].fcell_array[stats[z].n / 2]) / 2.0;
223 quartile_75 = (double)stats[z].fcell_array[qpos_75];
224 for (i = 0; i < stats[z].n_perc; i++) {
225 quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
226 }
227 break;
228
229 case DCELL_TYPE:
230 heapsort_double(stats[z].dcell_array, stats[z].n);
231
232 quartile_25 = stats[z].dcell_array[qpos_25];
233 if (stats[z].n % 2) /* odd */
234 median = stats[z].dcell_array[(int)(stats[z].n / 2)];
235 else /* even */
236 median =
237 (stats[z].dcell_array[stats[z].n / 2 - 1] +
238 stats[z].dcell_array[stats[z].n / 2]) / 2.0;
239 quartile_75 = stats[z].dcell_array[qpos_75];
240 for (i = 0; i < stats[z].n_perc; i++) {
241 quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
242 }
243 break;
244
245 default:
246 break;
247 }
248 }
249
250 if (param.shell_style->answer) {
251 fprintf(stdout, "first_quartile=%g\n", quartile_25);
252 fprintf(stdout, "median=%g\n", median);
253 fprintf(stdout, "third_quartile=%g\n", quartile_75);
254 for (i = 0; i < stats[z].n_perc; i++) {
255 char buf[24];
256 sprintf(buf, "%.15g", stats[z].perc[i]);
257 G_strchg(buf, '.', '_');
258 fprintf(stdout, "percentile_%s=%g\n", buf,
259 quartile_perc[i]);
260 }
261 }
262 else {
263 fprintf(stdout, "1st quartile: %g\n", quartile_25);
264 if (stats[z].n % 2)
265 fprintf(stdout, "median (odd number of cells): %g\n", median);
266 else
267 fprintf(stdout, "median (even number of cells): %g\n",
268 median);
269 fprintf(stdout, "3rd quartile: %g\n", quartile_75);
270
271
272 for (i = 0; i < stats[z].n_perc; i++) {
273 if (stats[z].perc[i] == (int)stats[z].perc[i]) {
274 /* percentile is an exact integer */
275 if ((int)stats[z].perc[i] % 10 == 1 && (int)stats[z].perc[i] != 11)
276 fprintf(stdout, "%dst percentile: %g\n", (int)stats[z].perc[i],
277 quartile_perc[i]);
278 else if ((int)stats[z].perc[i] % 10 == 2 && (int)stats[z].perc[i] != 12)
279 fprintf(stdout, "%dnd percentile: %g\n", (int)stats[z].perc[i],
280 quartile_perc[i]);
281 else if ((int)stats[z].perc[i] % 10 == 3 && (int)stats[z].perc[i] != 13)
282 fprintf(stdout, "%drd percentile: %g\n", (int)stats[z].perc[i],
283 quartile_perc[i]);
284 else
285 fprintf(stdout, "%dth percentile: %g\n", (int)stats[z].perc[i],
286 quartile_perc[i]);
287 }
288 else {
289 /* percentile is not an exact integer */
290 fprintf(stdout, "%.15g percentile: %g\n", stats[z].perc[i],
291 quartile_perc[i]);
292 }
293 }
294 }
295 G_free((void *)quartile_perc);
296 G_free((void *)qpos_perc);
297 }
298
299 /* G_message() prints to stderr not stdout: disabled. this \n is printed above with zone */
300 /* if (!(param.shell_style->answer))
301 G_message("\n"); */
302 }
303
304 return 1;
305}
306
307int print_stats_table(univar_stat * stats)
308{
309 unsigned int i;
310 int z, n_zones = zone_info.n_zones;
311
312 if (n_zones == 0)
313 n_zones = 1;
314
315 /* print column headers */
316
317 if (zone_info.n_zones) {
318 fprintf(stdout, "zone%s", zone_info.sep);
319 fprintf(stdout, "label%s", zone_info.sep);
320 }
321 fprintf(stdout, "non_null_cells%s", zone_info.sep);
322 fprintf(stdout, "null_cells%s", zone_info.sep);
323 fprintf(stdout, "min%s", zone_info.sep);
324 fprintf(stdout, "max%s", zone_info.sep);
325 fprintf(stdout, "range%s", zone_info.sep);
326 fprintf(stdout, "mean%s", zone_info.sep);
327 fprintf(stdout, "mean_of_abs%s", zone_info.sep);
328 fprintf(stdout, "stddev%s", zone_info.sep);
329 fprintf(stdout, "variance%s", zone_info.sep);
330 fprintf(stdout, "coeff_var%s", zone_info.sep);
331 fprintf(stdout, "sum%s", zone_info.sep);
332 fprintf(stdout, "sum_abs");
333
334 if (param.extended->answer) {
335 fprintf(stdout, "%sfirst_quart", zone_info.sep);
336 fprintf(stdout, "%smedian", zone_info.sep);
337 fprintf(stdout, "%sthird_quart", zone_info.sep);
338 for (i = 0; i < stats[0].n_perc; i++) {
339
340 if (stats[0].perc[i] == (int)stats[0].perc[i]) {
341 /* percentile is an exact integer */
342 fprintf(stdout, "%sperc_%d", zone_info.sep, (int)stats[0].perc[i]);
343 }
344 else {
345 /* percentile is not an exact integer */
346 char buf[24];
347 sprintf(buf, "%.15g", stats[0].perc[i]);
348 G_strchg(buf, '.', '_');
349 fprintf(stdout, "%sperc_%s", zone_info.sep, buf);
350 }
351 }
352 }
353 fprintf(stdout, "\n");
354
355 /* print stats */
356
357 for (z = 0; z < n_zones; z++) {
358 char sum_str[100];
359 double mean, variance, stdev, var_coef;
360
361 /* for extendet stats */
362 double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
363 double median = 0.0;
364 int qpos_25, qpos_75, *qpos_perc;
365
366 /* stats collected for this zone? */
367 if (stats[z].size == 0)
368 continue;
369
370 i = 0;
371
372 /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
373 mean = stats[z].sum / stats[z].n;
374 variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
375 if (variance < GRASS_EPSILON)
376 variance = 0.0;
377 stdev = sqrt(variance);
378 var_coef = (stdev / mean) * 100.; /* perhaps stdev/fabs(mean) ? */
379
380 if (stats[z].n == 0)
381 stats[z].sum = stats[z].sum_abs = 0.0 / 0.0;
382
383 if (zone_info.n_zones) {
384 int z_cat = z + zone_info.min;
385 /* zone number */
386 fprintf(stdout, "%d%s", z + zone_info.min, zone_info.sep);
387 /* zone label */
388 fprintf(stdout,"%s%s", Rast_get_c_cat(&z_cat, &(zone_info.cats)), zone_info.sep);
389 }
390
391 /* non-null cells cells */
392 fprintf(stdout, "%lu%s", stats[z].n, zone_info.sep);
393 /* null cells */
394 fprintf(stdout, "%lu%s", stats[z].size - stats[z].n, zone_info.sep);
395 /* min */
396 fprintf(stdout, "%.15g%s", stats[z].min, zone_info.sep);
397 /* max */
398 fprintf(stdout, "%.15g%s", stats[z].max, zone_info.sep);
399 /* range */
400 fprintf(stdout, "%.15g%s", stats[z].max - stats[z].min, zone_info.sep);
401 /* mean */
402 fprintf(stdout, "%.15g%s", mean, zone_info.sep);
403 /* mean of abs */
404 fprintf(stdout, "%.15g%s", stats[z].sum_abs / stats[z].n, zone_info.sep);
405 /* stddev */
406 fprintf(stdout, "%.15g%s", stdev, zone_info.sep);
407 /* variance */
408 fprintf(stdout, "%.15g%s", variance, zone_info.sep);
409 /* coefficient of variance */
410 fprintf(stdout, "%.15g%s", var_coef, zone_info.sep);
411 /* sum */
412 sprintf(sum_str, "%.15g", stats[z].sum);
413 G_trim_decimal(sum_str);
414 fprintf(stdout, "%s%s", sum_str, zone_info.sep);
415 /* absolute sum */
416 sprintf(sum_str, "%.15g", stats[z].sum_abs);
417 G_trim_decimal(sum_str);
418 fprintf(stdout, "%s", sum_str);
419
420 /* TODO: mode, skewness, kurtosis */
421 if (param.extended->answer) {
422 qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
423 quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
424
425
426 if (stats[z].n == 0) {
427 quartile_25 = median = quartile_75 = 0.0 / 0.0;
428 for (i = 0; i < stats[z].n_perc; i++)
429 quartile_perc[i] = 0.0 / 0.0;
430 }
431 else {
432 for (i = 0; i < stats[z].n_perc; i++) {
433 qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
434 }
435 qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
436 qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
437
438 switch (stats[z].map_type) {
439 case CELL_TYPE:
440 heapsort_int(stats[z].cell_array, stats[z].n);
441
442 quartile_25 = (double)stats[z].cell_array[qpos_25];
443 if (stats[z].n % 2) /* odd */
444 median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
445 else /* even */
446 median =
447 (double)(stats[z].cell_array[stats[z].n / 2 - 1] +
448 stats[z].cell_array[stats[z].n / 2]) / 2.0;
449 quartile_75 = (double)stats[z].cell_array[qpos_75];
450 for (i = 0; i < stats[z].n_perc; i++) {
451 quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
452 }
453 break;
454
455 case FCELL_TYPE:
456 heapsort_float(stats[z].fcell_array, stats[z].n);
457
458 quartile_25 = (double)stats[z].fcell_array[qpos_25];
459 if (stats[z].n % 2) /* odd */
460 median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
461 else /* even */
462 median =
463 (double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
464 stats[z].fcell_array[stats[z].n / 2]) / 2.0;
465 quartile_75 = (double)stats[z].fcell_array[qpos_75];
466 for (i = 0; i < stats[z].n_perc; i++) {
467 quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
468 }
469 break;
470
471 case DCELL_TYPE:
472 heapsort_double(stats[z].dcell_array, stats[z].n);
473
474 quartile_25 = stats[z].dcell_array[qpos_25];
475 if (stats[z].n % 2) /* odd */
476 median = stats[z].dcell_array[(int)(stats[z].n / 2)];
477 else /* even */
478 median =
479 (stats[z].dcell_array[stats[z].n / 2 - 1] +
480 stats[z].dcell_array[stats[z].n / 2]) / 2.0;
481 quartile_75 = stats[z].dcell_array[qpos_75];
482 for (i = 0; i < stats[z].n_perc; i++) {
483 quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
484 }
485 break;
486
487 default:
488 break;
489 }
490 }
491
492 /* first quartile */
493 fprintf(stdout, "%s%g", zone_info.sep, quartile_25);
494 /* median */
495 fprintf(stdout, "%s%g", zone_info.sep, median);
496 /* third quartile */
497 fprintf(stdout, "%s%g", zone_info.sep, quartile_75);
498 /* percentiles */
499 for (i = 0; i < stats[z].n_perc; i++) {
500 fprintf(stdout, "%s%g", zone_info.sep ,
501 quartile_perc[i]);
502 }
503
504 G_free((void *)quartile_perc);
505 G_free((void *)qpos_perc);
506 }
507
508 fprintf(stdout, "\n");
509
510 /* zone z finished */
511
512 }
513
514 return 1;
515}
Note: See TracBrowser for help on using the repository browser.