| 1 |
|
|---|
| 2 | /*!
|
|---|
| 3 | \file lib/gis/token.c
|
|---|
| 4 |
|
|---|
| 5 | \brief GIS Library - Tokenize strings
|
|---|
| 6 |
|
|---|
| 7 | (C) 2001-2008, 2011-2013 by the GRASS Development Team
|
|---|
| 8 |
|
|---|
| 9 | This program is free software under the GNU General Public License
|
|---|
| 10 | (>=v2). Read the file COPYING that comes with GRASS for details.
|
|---|
| 11 |
|
|---|
| 12 | \author USA CERL and others
|
|---|
| 13 | */
|
|---|
| 14 |
|
|---|
| 15 | #include <stdlib.h>
|
|---|
| 16 | #include <string.h>
|
|---|
| 17 | #include <grass/gis.h>
|
|---|
| 18 | #include <grass/glocale.h>
|
|---|
| 19 |
|
|---|
| 20 | static char **tokenize(const char *, const char *, const char *);
|
|---|
| 21 |
|
|---|
| 22 | /*!
|
|---|
| 23 | \brief Tokenize string
|
|---|
| 24 |
|
|---|
| 25 | Given a string, <em>buf</em>, turn delimiter, <em>delim</em>, into
|
|---|
| 26 | '\0' (NULL) and place pointers to tokens in tokens. <em>buf</em>
|
|---|
| 27 | must not contain a new line (\n). <em>delim</em> may consist of more
|
|---|
| 28 | than one character. G_free_tokens() must be called when finished
|
|---|
| 29 | with tokens to release memory.
|
|---|
| 30 |
|
|---|
| 31 | Example:
|
|---|
| 32 | \code
|
|---|
| 33 | char **tokens;
|
|---|
| 34 | int ntok, i;
|
|---|
| 35 | tokens = G_tokenize(buf, " |:,");
|
|---|
| 36 | ntok = G_number_of_tokens(tokens);
|
|---|
| 37 | for (i=0; i < ntok; i++) {
|
|---|
| 38 | G_debug(1, "%d=[%s]", i, tokens[i]);
|
|---|
| 39 | }
|
|---|
| 40 | G_free_tokens(tokens);
|
|---|
| 41 | \endcode
|
|---|
| 42 |
|
|---|
| 43 | \param buf input string
|
|---|
| 44 | \param delim string delimiter
|
|---|
| 45 |
|
|---|
| 46 | \return pointer to string token
|
|---|
| 47 | */
|
|---|
| 48 | char **G_tokenize(const char *buf, const char *delim)
|
|---|
| 49 | {
|
|---|
| 50 | return tokenize(buf, delim, NULL);
|
|---|
| 51 | }
|
|---|
| 52 |
|
|---|
| 53 | /*!
|
|---|
| 54 | \brief Tokenize string
|
|---|
| 55 |
|
|---|
| 56 | This function behaves similarly to G_tokenize().
|
|---|
| 57 |
|
|---|
| 58 | It introduces <em>valchar</em> which defines borders of token. Within
|
|---|
| 59 | token <em>delim</em> is ignored.
|
|---|
| 60 |
|
|---|
| 61 | Example:
|
|---|
| 62 | \code
|
|---|
| 63 | char *str = "a,'b,c',d";
|
|---|
| 64 |
|
|---|
| 65 | char **tokens1, **tokens2;
|
|---|
| 66 | int ntok1, ntok2;
|
|---|
| 67 |
|
|---|
| 68 | tokens1 = G_tokenize(str, ",");
|
|---|
| 69 | ntok1 = G_number_of_tokens(tokens1);
|
|---|
| 70 |
|
|---|
| 71 | tokens1 = G_tokenize2(str, ",", "'");
|
|---|
| 72 | ntok2 = G_number_of_tokens(tokens2);
|
|---|
| 73 | \endcode
|
|---|
| 74 |
|
|---|
| 75 | In this example <em>ntok1</em> will be 4, <em>ntok2</em> only 3,
|
|---|
| 76 | i.e. { "a", "'b, c'", "d"}
|
|---|
| 77 |
|
|---|
| 78 | \param buf input string
|
|---|
| 79 | \param delim string delimiter
|
|---|
| 80 | \param valchar character defining border of token
|
|---|
| 81 |
|
|---|
| 82 | \return pointer to string token
|
|---|
| 83 | */
|
|---|
| 84 | char **G_tokenize2(const char *buf, const char *delim, const char *valchar)
|
|---|
| 85 | {
|
|---|
| 86 | return tokenize(buf, delim, valchar);
|
|---|
| 87 | }
|
|---|
| 88 |
|
|---|
| 89 | char **tokenize(const char *buf, const char *delim, const char *inchar)
|
|---|
| 90 | {
|
|---|
| 91 | int i;
|
|---|
| 92 | char **tokens;
|
|---|
| 93 | const char *p;
|
|---|
| 94 | char *q;
|
|---|
| 95 | enum {
|
|---|
| 96 | S_START,
|
|---|
| 97 | S_IN_QUOTE,
|
|---|
| 98 | S_AFTER_QUOTE,
|
|---|
| 99 | };
|
|---|
| 100 | enum {
|
|---|
| 101 | A_NO_OP,
|
|---|
| 102 | A_ADD_CHAR,
|
|---|
| 103 | A_NEW_FIELD,
|
|---|
| 104 | A_END_RECORD,
|
|---|
| 105 | A_ERROR
|
|---|
| 106 | };
|
|---|
| 107 | int state;
|
|---|
| 108 | int quo = inchar ? *inchar : -1;
|
|---|
| 109 |
|
|---|
| 110 | /* do not modify buf, make a copy */
|
|---|
| 111 | p = q = G_store(buf);
|
|---|
| 112 |
|
|---|
| 113 | i = 0;
|
|---|
| 114 | tokens = (char **)G_malloc(2 * sizeof(char *));
|
|---|
| 115 |
|
|---|
| 116 | /* always one token */
|
|---|
| 117 | tokens[i++] = q;
|
|---|
| 118 |
|
|---|
| 119 | for (state = S_START; ; p++) {
|
|---|
| 120 | int c = *p;
|
|---|
| 121 | int action = A_NO_OP;
|
|---|
| 122 | switch (state) {
|
|---|
| 123 | case S_START:
|
|---|
| 124 | if (c == quo)
|
|---|
| 125 | state = S_IN_QUOTE;
|
|---|
| 126 | else if (c == '\0')
|
|---|
| 127 | action = A_END_RECORD;
|
|---|
| 128 | else if (strchr(delim, c))
|
|---|
| 129 | action = A_NEW_FIELD;
|
|---|
| 130 | else
|
|---|
| 131 | action = A_ADD_CHAR;
|
|---|
| 132 | break;
|
|---|
| 133 | case S_IN_QUOTE:
|
|---|
| 134 | if (c == quo)
|
|---|
| 135 | state = S_AFTER_QUOTE;
|
|---|
| 136 | else if (c == '\0')
|
|---|
| 137 | action = A_ERROR;
|
|---|
| 138 | else
|
|---|
| 139 | action = A_ADD_CHAR;
|
|---|
| 140 | break;
|
|---|
| 141 | case S_AFTER_QUOTE:
|
|---|
| 142 | if (c == quo)
|
|---|
| 143 | state = S_IN_QUOTE, action = A_ADD_CHAR;
|
|---|
| 144 | else if (c == '\0')
|
|---|
| 145 | action = A_END_RECORD;
|
|---|
| 146 | else if (strchr(delim, c))
|
|---|
| 147 | state = S_START, action = A_NEW_FIELD;
|
|---|
| 148 | else
|
|---|
| 149 | action = A_ERROR;
|
|---|
| 150 | break;
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 | switch (action) {
|
|---|
| 154 | case A_NO_OP:
|
|---|
| 155 | break;
|
|---|
| 156 | case A_ADD_CHAR:
|
|---|
| 157 | *q++ = *p;
|
|---|
| 158 | break;
|
|---|
| 159 | case A_NEW_FIELD:
|
|---|
| 160 | *q++ = '\0';
|
|---|
| 161 | tokens[i++] = q;
|
|---|
| 162 | tokens = G_realloc(tokens, (i + 2) * sizeof(char *));
|
|---|
| 163 | break;
|
|---|
| 164 | case A_END_RECORD:
|
|---|
| 165 | *q++ = '\0';
|
|---|
| 166 | tokens[i++] = NULL;
|
|---|
| 167 | return tokens;
|
|---|
| 168 | case A_ERROR:
|
|---|
| 169 | G_warning(_("parse error"));
|
|---|
| 170 | *q++ = '\0';
|
|---|
| 171 | tokens[i++] = NULL;
|
|---|
| 172 | return tokens;
|
|---|
| 173 | }
|
|---|
| 174 | }
|
|---|
| 175 | }
|
|---|
| 176 |
|
|---|
| 177 | /*!
|
|---|
| 178 | \brief Return number of tokens
|
|---|
| 179 |
|
|---|
| 180 | \param tokens
|
|---|
| 181 |
|
|---|
| 182 | \return number of tokens
|
|---|
| 183 | */
|
|---|
| 184 |
|
|---|
| 185 | int G_number_of_tokens(char **tokens)
|
|---|
| 186 | {
|
|---|
| 187 | int n;
|
|---|
| 188 |
|
|---|
| 189 | n = 0;
|
|---|
| 190 | for (n = 0; tokens[n] != NULL; n++)
|
|---|
| 191 | ;
|
|---|
| 192 |
|
|---|
| 193 | return n;
|
|---|
| 194 | }
|
|---|
| 195 |
|
|---|
| 196 | /*!
|
|---|
| 197 | \brief Free memory allocated to tokens.
|
|---|
| 198 |
|
|---|
| 199 | <b>Note:</b> <i>G_free_tokens()</i> must be called when finished with
|
|---|
| 200 | tokens to release memory.
|
|---|
| 201 |
|
|---|
| 202 | \param[out] tokens
|
|---|
| 203 | */
|
|---|
| 204 | void G_free_tokens(char **tokens)
|
|---|
| 205 | {
|
|---|
| 206 | if (tokens[0] != NULL)
|
|---|
| 207 | G_free(tokens[0]);
|
|---|
| 208 | G_free(tokens);
|
|---|
| 209 | }
|
|---|