source: grass/trunk/raster/r.stats.quantile/main.c

Last change on this file was 70124, checked in by neteler, 8 years ago

r.quantile + r.stats.quantile: keywords added

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc
File size: 15.2 KB
Line 
1
2/****************************************************************************
3 *
4 * MODULE: r.stats.quantile
5 * AUTHOR(S): Glynn Clements <glynn gclements.plus.com> (original contributor)
6 * Markus Metz: dynamic bins to reduce memory consumptions
7 * PURPOSE: Compute category or object oriented quantiles using two passes
8 *
9 * This program is free software under the GNU General Public
10 * License (>=v2). Read the file COPYING that comes with GRASS
11 * for details.
12 *
13 *****************************************************************************/
14#include <stdlib.h>
15#include <string.h>
16#include <math.h>
17#include <grass/gis.h>
18#include <grass/raster.h>
19#include <grass/glocale.h>
20#include <grass/spawn.h>
21
22struct bin
23{
24 unsigned long origin;
25 int base, count;
26};
27
28struct basecat
29{
30 unsigned int *slots;
31 unsigned long total;
32 int num_values;
33 DCELL min, max, slot_size;
34 int num_slots;
35 unsigned char *slot_bins;
36 int num_bins;
37 struct bin *bins;
38 DCELL *values;
39 DCELL *quants;
40};
41
42static int num_quants;
43static DCELL *quants;
44static DCELL min, max;
45static int num_slots;
46
47static int rows, cols;
48
49static CELL cmin, cmax;
50static int num_cats;
51static struct basecat *basecats;
52
53static inline int get_slot(struct basecat *bc, DCELL c)
54{
55 int i;
56
57 if (bc->num_slots == 0)
58 return -1;
59
60 i = (int)floor((c - bc->min) / bc->slot_size);
61
62 if (i < 0)
63 i = 0;
64 if (i > bc->num_slots - 1)
65 i = bc->num_slots - 1;
66 return i;
67}
68
69static inline double get_quantile(struct basecat *bc, int n)
70{
71 if (n >= num_quants)
72 return (double)bc->total + bc->total;
73
74 return (double)bc->total * quants[n];
75}
76
77static void get_slot_counts(int basefile, int coverfile)
78{
79 CELL *basebuf = Rast_allocate_c_buf();
80 DCELL *coverbuf = Rast_allocate_d_buf();
81 struct basecat *bc;
82 int row, col;
83 int i;
84 int allnull;
85
86 G_message(_("Computing histograms"));
87
88 for (i = 0; i < num_cats; i++) {
89 bc = &basecats[i];
90 bc->min = max;
91 bc->max = min;
92 }
93
94 allnull = 1;
95 for (row = 0; row < rows; row++) {
96 G_percent(row, rows, 2);
97
98 Rast_get_c_row(basefile, basebuf, row);
99 Rast_get_d_row(coverfile, coverbuf, row);
100
101 for (col = 0; col < cols; col++) {
102 if (Rast_is_c_null_value(&basebuf[col]))
103 continue;
104
105 if (Rast_is_d_null_value(&coverbuf[col]))
106 continue;
107
108 allnull = 0;
109
110 bc = &basecats[basebuf[col] - cmin];
111
112 bc->total++;
113
114 if (bc->min > coverbuf[col])
115 bc->min = coverbuf[col];
116 if (bc->max < coverbuf[col])
117 bc->max = coverbuf[col];
118 }
119 }
120 G_percent(rows, rows, 2);
121
122 if (allnull)
123 G_fatal_error(_("No cells found where both base and cover are not NULL"));
124
125 for (i = 0; i < num_cats; i++) {
126 bc = &basecats[i];
127
128 bc->num_slots = 0;
129 bc->slot_size = 0;
130
131 if (bc->max <= bc->min)
132 continue;
133
134 bc->num_slots = 1;
135 if (bc->total * 10 > (unsigned long) num_slots)
136 bc->num_slots = num_slots;
137
138 bc->slots = G_calloc(bc->num_slots, sizeof(unsigned int));
139 bc->slot_size = (bc->max - bc->min) / bc->num_slots;
140 }
141
142 for (row = 0; row < rows; row++) {
143 G_percent(row, rows, 2);
144
145 Rast_get_c_row(basefile, basebuf, row);
146 Rast_get_d_row(coverfile, coverbuf, row);
147
148 for (col = 0; col < cols; col++) {
149 if (Rast_is_c_null_value(&basebuf[col]))
150 continue;
151
152 if (Rast_is_d_null_value(&coverbuf[col]))
153 continue;
154
155 bc = &basecats[basebuf[col] - cmin];
156
157 if (bc->num_slots == 0)
158 continue;
159
160 i = get_slot(bc, coverbuf[col]);
161 bc->slots[i]++;
162 }
163 }
164 G_percent(rows, rows, 2);
165
166 G_free(basebuf);
167 G_free(coverbuf);
168}
169
170static void initialize_bins(void)
171{
172 int cat;
173
174 G_message(_("Computing bins"));
175
176 for (cat = 0; cat < num_cats; cat++) {
177 struct basecat *bc = &basecats[cat];
178 int slot;
179 double next;
180 int num_values = 0;
181 int bin = 0;
182 unsigned long accum = 0;
183 int quant = 0;
184
185 if (bc->num_slots == 0)
186 continue;
187
188 bc->bins = G_calloc(num_quants, sizeof(struct bin));
189 bc->slot_bins = G_calloc(bc->num_slots, sizeof(unsigned char));
190
191 next = get_quantile(bc, quant);
192
193 for (slot = 0; slot < bc->num_slots; slot++) {
194 unsigned int count = bc->slots[slot];
195 unsigned long accum2 = accum + count;
196
197 if (accum2 > next ||
198 (slot == bc->num_slots - 1 && accum2 == next)) {
199 struct bin *b = &bc->bins[bin];
200
201 bc->slot_bins[slot] = ++bin;
202
203 b->origin = accum;
204 b->base = num_values;
205 b->count = 0;
206
207 while (accum2 > next)
208 next = get_quantile(bc, ++quant);
209
210 num_values += count;
211 }
212
213 accum = accum2;
214 }
215
216 bc->num_values = num_values;
217 bc->num_bins = bin;
218
219 G_free(bc->slots);
220
221 bc->values = G_calloc(num_values, sizeof(DCELL));
222 }
223}
224
225static void fill_bins(int basefile, int coverfile)
226{
227 CELL *basebuf = Rast_allocate_c_buf();
228 DCELL *coverbuf = Rast_allocate_d_buf();
229 int row, col;
230
231 G_message(_("Binning data"));
232
233 for (row = 0; row < rows; row++) {
234 Rast_get_c_row(basefile, basebuf, row);
235 Rast_get_d_row(coverfile, coverbuf, row);
236
237 for (col = 0; col < cols; col++) {
238 struct basecat *bc;
239 int i, bin;
240 struct bin *b;
241
242 if (Rast_is_c_null_value(&basebuf[col]))
243 continue;
244
245 if (Rast_is_d_null_value(&coverbuf[col]))
246 continue;
247
248 bc = &basecats[basebuf[col] - cmin];
249
250 if (bc->num_slots == 0)
251 continue;
252
253 i = get_slot(bc, coverbuf[col]);
254 if (!bc->slot_bins[i])
255 continue;
256
257 bin = bc->slot_bins[i] - 1;
258 b = &bc->bins[bin];
259
260 bc->values[b->base + b->count++] = coverbuf[col];
261 }
262
263 G_percent(row, rows, 2);
264 }
265 G_percent(rows, rows, 2);
266
267 G_free(basebuf);
268 G_free(coverbuf);
269}
270
271static int compare_dcell(const void *aa, const void *bb)
272{
273 DCELL a = *(const DCELL *)aa;
274 DCELL b = *(const DCELL *)bb;
275
276 if (a < b)
277 return -1;
278 if (a > b)
279 return 1;
280 return 0;
281}
282
283static void sort_bins(void)
284{
285 int cat;
286
287 G_message(_("Sorting bins"));
288
289 for (cat = 0; cat < num_cats; cat++) {
290 struct basecat *bc = &basecats[cat];
291 int bin;
292
293 if (bc->num_slots == 0)
294 continue;
295
296 G_free(bc->slot_bins);
297
298 for (bin = 0; bin < bc->num_bins; bin++) {
299 struct bin *b = &bc->bins[bin];
300
301 qsort(&bc->values[b->base], b->count, sizeof(DCELL), compare_dcell);
302 }
303
304 G_percent(cat, num_cats, 2);
305 }
306
307 G_percent(cat, num_cats, 2);
308}
309
310static void print_quantiles(char *fs, char *name, int table_frmt)
311{
312 int cat, quant;
313 struct basecat *bc;
314
315 G_message(_("Printing quantiles"));
316
317 if (name != NULL && strcmp(name, "-") != 0) {
318 if (NULL == freopen(name, "w", stdout)) {
319 G_fatal_error(_("Unable to open file <%s> for writing"), name);
320 }
321 }
322
323 if (!table_frmt) {
324 for (cat = 0; cat < num_cats; cat++) {
325 bc = &basecats[cat];
326
327 if (bc->total == 0)
328 continue;
329
330 for (quant = 0; quant < num_quants; quant++)
331 fprintf(stdout, "%d%s%d%s%f%s%f\n", cmin + cat, fs, quant, fs,
332 100 * quants[quant], fs, bc->quants[quant]);
333 }
334 }
335 else {
336 fprintf(stdout, "cat");
337 for (quant = 0; quant < num_quants; quant++)
338 fprintf(stdout, "%s%f", fs, 100 * quants[quant]);
339 fprintf(stdout, "\n");
340
341 for (cat = 0; cat < num_cats; cat++) {
342 bc = &basecats[cat];
343
344 if (bc->total == 0)
345 continue;
346
347 fprintf(stdout, "%d", cmin + cat);
348 for (quant = 0; quant < num_quants; quant++)
349 fprintf(stdout, "%s%f", fs, bc->quants[quant]);
350 fprintf(stdout, "\n");
351 }
352 }
353
354}
355
356static void compute_quantiles(void)
357{
358 int cat;
359
360 G_message(_("Computing quantiles"));
361
362 for (cat = 0; cat < num_cats; cat++) {
363 struct basecat *bc = &basecats[cat];
364 int quant;
365
366 if (bc->max < bc->min)
367 continue;
368
369 bc->quants = G_malloc(num_quants * sizeof(DCELL));
370
371 if (bc->max == bc->min) {
372 for (quant = 0; quant < num_quants; quant++)
373 bc->quants[quant] = bc->min;
374 }
375 else {
376 struct bin *b = &bc->bins[0];
377
378 for (quant = 0; quant < num_quants; quant++) {
379 double next = get_quantile(bc, quant);
380 double k, v;
381 int i0, i1;
382
383 while (b->origin + b->count < next)
384 b++;
385
386 k = next - b->origin;
387 i0 = (int)floor(k);
388 i1 = (int)ceil(k);
389
390 if (i0 > b->count - 1)
391 i0 = b->count - 1;
392 if (i1 > b->count - 1)
393 i1 = b->count - 1;
394
395 v = (i0 == i1)
396 ? bc->values[b->base + i0]
397 : bc->values[b->base + i0] * (i1 - k) + bc->values[b->base + i1] * (k - i0);
398
399 bc->quants[quant] = v;
400 }
401 }
402 }
403}
404
405static void do_reclass(const char *basemap, char **outputs)
406{
407 const char *tempfile = G_tempfile();
408 char *input_arg = G_malloc(strlen(basemap) + 7);
409 char *rules_arg = G_malloc(strlen(tempfile) + 7);
410 int quant;
411
412 G_message(_("Generating reclass maps"));
413
414 sprintf(input_arg, "input=%s", basemap);
415 sprintf(rules_arg, "rules=%s", tempfile);
416
417 for (quant = 0; quant < num_quants; quant++) {
418 const char *output = outputs[quant];
419 char *output_arg = G_malloc(strlen(output) + 8);
420 FILE *fp;
421 int cat;
422
423 sprintf(output_arg, "output=%s", output);
424
425 fp = fopen(tempfile, "w");
426 if (!fp)
427 G_fatal_error(_("Unable to open temporary file"));
428
429 for (cat = 0; cat < num_cats; cat++) {
430 if (basecats[cat].total > 0)
431 fprintf(fp, "%d = %d %f\n", cmin + cat, cmin + cat, basecats[cat].quants[quant]);
432 }
433
434 fclose(fp);
435
436 G_spawn("r.reclass", "r.reclass", input_arg, output_arg, rules_arg, NULL);
437 }
438
439 remove(tempfile);
440}
441
442static void do_output(int base_fd, char **outputs, const char *covermap)
443{
444 int *out_fd = G_malloc(num_quants * sizeof(int));
445 CELL *base_buf = Rast_allocate_c_buf();
446 DCELL *out_buf = Rast_allocate_d_buf();
447 const char *mapset = G_mapset();
448 struct Colors colors;
449 struct History history;
450 int have_colors;
451 int quant;
452 int row, col;
453
454 G_message(_("Writing output maps"));
455
456 for (quant = 0; quant < num_quants; quant++) {
457 const char *output = outputs[quant];
458
459 out_fd[quant] = Rast_open_fp_new(output);
460 }
461
462 have_colors = Rast_read_colors(covermap, "", &colors) > 0;
463
464 for (row = 0; row < rows; row++) {
465 Rast_get_c_row(base_fd, base_buf, row);
466
467 for (quant = 0; quant < num_quants; quant++) {
468 for (col = 0; col < cols; col++)
469 if (Rast_is_c_null_value(&base_buf[col]))
470 Rast_set_d_null_value(&out_buf[col], 1);
471 else if (basecats[base_buf[col] - cmin].total == 0)
472 Rast_set_d_null_value(&out_buf[col], 1);
473 else
474 out_buf[col] = basecats[base_buf[col] - cmin].quants[quant];
475
476 Rast_put_d_row(out_fd[quant], out_buf);
477 }
478
479 G_percent(row, rows, 2);
480 }
481
482 G_percent(row, rows, 2);
483
484 for (quant = 0; quant < num_quants; quant++) {
485 Rast_close(out_fd[quant]);
486 Rast_short_history(outputs[quant], "raster", &history);
487 Rast_command_history(&history);
488 Rast_write_history(outputs[quant], &history);
489 if (have_colors)
490 Rast_write_colors(outputs[quant], mapset, &colors);
491 }
492}
493
494int main(int argc, char *argv[])
495{
496 struct GModule *module;
497 struct
498 {
499 struct Option *quant, *perc, *slots, *basemap, *covermap,
500 *output, *file, *fs;
501 } opt;
502 struct {
503 struct Flag *r, *p, *t;
504 } flag;
505 const char *basemap, *covermap;
506 char **outputs, *fs;
507 int reclass, print;
508 int cover_fd, base_fd;
509 struct Range range;
510 struct FPRange fprange;
511 int i;
512
513 G_gisinit(argv[0]);
514
515 module = G_define_module();
516 G_add_keyword(_("raster"));
517 G_add_keyword(_("statistics"));
518 G_add_keyword(_("percentile"));
519 G_add_keyword(_("quantile"));
520 module->description = _("Compute category quantiles using two passes.");
521
522 opt.basemap = G_define_standard_option(G_OPT_R_BASE);
523
524 opt.covermap = G_define_standard_option(G_OPT_R_COVER);
525
526 opt.quant = G_define_option();
527 opt.quant->key = "quantiles";
528 opt.quant->type = TYPE_INTEGER;
529 opt.quant->required = NO;
530 opt.quant->description = _("Number of quantiles");
531
532 opt.perc = G_define_option();
533 opt.perc->key = "percentiles";
534 opt.perc->type = TYPE_DOUBLE;
535 opt.perc->multiple = YES;
536 opt.perc->description = _("List of percentiles");
537 opt.perc->answer = "50";
538
539 opt.slots = G_define_option();
540 opt.slots->key = "bins";
541 opt.slots->type = TYPE_INTEGER;
542 opt.slots->required = NO;
543 opt.slots->description = _("Number of bins to use");
544 opt.slots->answer = "1000";
545
546 opt.output = G_define_standard_option(G_OPT_R_OUTPUT);
547 opt.output->description = _("Resultant raster map(s)");
548 opt.output->required = NO;
549 opt.output->multiple = YES;
550
551 opt.file = G_define_standard_option(G_OPT_F_OUTPUT);
552 opt.file->key = "file";
553 opt.file->required = NO;
554 opt.file->description =
555 _("Name for output file (if omitted or \"-\" output to stdout)");
556
557 opt.fs = G_define_standard_option(G_OPT_F_SEP);
558 opt.fs->answer = ":";
559 opt.fs->guisection = _("Formatting");
560
561 flag.r = G_define_flag();
562 flag.r->key = 'r';
563 flag.r->description =
564 _("Create reclass map with statistics as category labels");
565
566 flag.p = G_define_flag();
567 flag.p->key = 'p';
568 flag.p->description =
569 _("Do not create output maps; just print statistics");
570
571 flag.t = G_define_flag();
572 flag.t->key = 't';
573 flag.t->description =
574 _("Print statistics in table format");
575
576 if (G_parser(argc, argv))
577 exit(EXIT_FAILURE);
578
579 basemap = opt.basemap->answer;
580 covermap = opt.covermap->answer;
581 outputs = opt.output->answers;
582 reclass = flag.r->answer;
583 print = flag.p->answer || flag.t->answer;
584
585 if (!print && !opt.output->answers)
586 G_fatal_error(_("Either -%c or %s= must be given"),
587 flag.p->key, opt.output->key);
588
589 if (print && opt.output->answers)
590 G_fatal_error(_("-%c and %s= are mutually exclusive"),
591 flag.p->key, opt.output->key);
592
593 num_slots = atoi(opt.slots->answer);
594
595 if (opt.quant->answer) {
596 num_quants = atoi(opt.quant->answer) - 1;
597 quants = G_calloc(num_quants, sizeof(DCELL));
598 for (i = 0; i < num_quants; i++)
599 quants[i] = 1.0 * (i + 1) / (num_quants + 1);
600 }
601 else {
602 for (i = 0; opt.perc->answers[i]; i++)
603 ;
604 num_quants = i;
605 quants = G_calloc(num_quants, sizeof(DCELL));
606 for (i = 0; i < num_quants; i++)
607 quants[i] = atof(opt.perc->answers[i]) / 100;
608 qsort(quants, num_quants, sizeof(DCELL), compare_dcell);
609 }
610
611 if (opt.output->answer) {
612 for (i = 0; opt.output->answers[i]; i++)
613 ;
614 if (i != num_quants)
615 G_fatal_error(_("Number of quantiles (%d) does not match number of output maps (%d)"),
616 num_quants, i);
617 }
618
619 base_fd = Rast_open_old(basemap, "");
620
621 cover_fd = Rast_open_old(covermap, "");
622
623 if (Rast_map_is_fp(basemap, "") != 0)
624 G_fatal_error(_("The base map must be an integer (CELL) map"));
625
626 if (Rast_read_range(basemap, "", &range) < 0)
627 G_fatal_error(_("Unable to read range of base map <%s>"), basemap);
628
629 Rast_get_range_min_max(&range, &cmin, &cmax);
630 num_cats = cmax - cmin + 1;
631 if (num_cats > 100000)
632 G_warning(_("Base map <%s> has many categories (%d), computation might be slow and might need a lot of memory"),
633 basemap, num_cats);
634
635 Rast_read_fp_range(covermap, "", &fprange);
636 Rast_get_fp_range_min_max(&fprange, &min, &max);
637
638 basecats = G_calloc(num_cats, sizeof(struct basecat));
639 rows = Rast_window_rows();
640 cols = Rast_window_cols();
641
642 get_slot_counts(base_fd, cover_fd);
643 initialize_bins();
644 fill_bins(base_fd, cover_fd);
645 sort_bins();
646 compute_quantiles();
647
648 if (print) {
649 /* get field separator */
650 fs = G_option_to_separator(opt.fs);
651
652 print_quantiles(fs, opt.file->answer, flag.t->answer);
653 }
654 else if (reclass)
655 do_reclass(basemap, outputs);
656 else
657 do_output(base_fd, outputs, covermap);
658
659 Rast_close(cover_fd);
660 Rast_close(base_fd);
661
662 return (EXIT_SUCCESS);
663}
Note: See TracBrowser for help on using the repository browser.