source: trunk/mapserver/mapstring.c

Last change on this file was 13224, checked in by tbonfort, 12 years ago

implement RFC81 label-leader offsetting
heavy refactoring in msDrawLabelCache and msDrawShape to cut down on processing time and memory consumption

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 58.9 KB
Line 
1/******************************************************************************
2 * $Id: mapstring.c 13224 2012-03-09 14:13:01Z tbonfort $
3 *
4 * Project: MapServer
5 * Purpose: Various string handling functions.
6 * Author: Steve Lime and the MapServer team.
7 *
8 * Notes: A couple of string handling functions (strrstr, strlcat) were taken from
9 * other sources. Copyright notices accompany those functions below.
10 *
11 ******************************************************************************
12 * Copyright (c) 1996-2005 Regents of the University of Minnesota.
13 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a
16 * copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies of this Software or works derived from this Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
26 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
31 * DEALINGS IN THE SOFTWARE.
32 ****************************************************************************/
33
34#include "mapserver.h"
35
36MS_CVSID("$Id: mapstring.c 13224 2012-03-09 14:13:01Z tbonfort $")
37
38#include <ctype.h>
39#include <string.h>
40#include <errno.h>
41
42/*
43 * Find the first occurrence of find in s, ignore case.
44 */
45
46#ifdef USE_FRIBIDI
47#if (defined(_WIN32) && !defined(__CYGWIN__)) || defined(USE_FRIBIDI2)
48#include "fribidi.h"
49#else
50#include <fribidi/fribidi.h>
51#endif
52#define MAX_STR_LEN 65000
53#endif
54
55#ifdef USE_ICONV
56#include <iconv.h>
57#include <wchar.h>
58#endif
59
60#include "mapentities.h"
61
62#ifdef NEED_STRRSTR
63/*
64** Copyright (c) 2000-2004 University of Illinois Board of Trustees
65** Copyright (c) 2000-2005 Mark D. Roth
66** All rights reserved.
67**
68** Developed by: Campus Information Technologies and Educational Services,
69** University of Illinois at Urbana-Champaign
70**
71** Permission is hereby granted, free of charge, to any person obtaining
72** a copy of this software and associated documentation files (the
73** ``Software''), to deal with the Software without restriction, including
74** without limitation the rights to use, copy, modify, merge, publish,
75** distribute, sublicense, and/or sell copies of the Software, and to
76** permit persons to whom the Software is furnished to do so, subject to
77** the following conditions:
78**
79** * Redistributions of source code must retain the above copyright
80** notice, this list of conditions and the following disclaimers.
81**
82** * Redistributions in binary form must reproduce the above copyright
83** notice, this list of conditions and the following disclaimers in the
84** documentation and/or other materials provided with the distribution.
85**
86** * Neither the names of Campus Information Technologies and Educational
87** Services, University of Illinois at Urbana-Champaign, nor the names
88** of its contributors may be used to endorse or promote products derived
89** from this Software without specific prior written permission.
90**
91** THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
92** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
93** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
94** IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR
95** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
96** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
97** OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
98*/
99char *strrstr(char *string, char *find)
100{
101 size_t stringlen, findlen;
102 char *cp;
103
104 findlen = strlen(find);
105 stringlen = strlen(string);
106 if (findlen > stringlen)
107 return NULL;
108
109 for (cp = string + stringlen - findlen; cp >= string; cp--)
110 if (strncmp(cp, find, findlen) == 0)
111 return cp;
112
113 return NULL;
114}
115#endif
116
117#ifdef NEED_STRLCAT
118/*
119 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
120 *
121 * Permission to use, copy, modify, and distribute this software for any
122 * purpose with or without fee is hereby granted, provided that the above
123 * copyright notice and this permission notice appear in all copies.
124 *
125 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
126 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
127 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
128 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
129 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
130 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
131 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
132 */
133
134/*
135 * Appends src to string dst of size siz (unlike strncat, siz is the
136 * full size of dst, not space left). At most siz-1 characters
137 * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
138 * Returns strlen(src) + MIN(siz, strlen(initial dst)).
139 * If retval >= siz, truncation occurred.
140 */
141size_t strlcat(char *dst, const char *src, size_t siz)
142{
143 register char *d = dst;
144 register const char *s = src;
145 register size_t n = siz;
146 size_t dlen;
147
148 /* Find the end of dst and adjust bytes left but don't go past end */
149 while (n-- != 0 && *d != '\0')
150 d++;
151 dlen = d - dst;
152 n = siz - dlen;
153
154 if (n == 0)
155 return(dlen + strlen(s));
156 while (*s != '\0') {
157 if (n != 1) {
158 *d++ = *s;
159 n--;
160 }
161 s++;
162 }
163 *d = '\0';
164
165 return(dlen + (s - src));/* count does not include NUL */
166}
167#endif
168
169#ifdef NEED_STRLCPY
170/*
171 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
172 * All rights reserved.
173 *
174 * Redistribution and use in source and binary forms, with or without
175 * modification, are permitted provided that the following conditions
176 * are met:
177 * 1. Redistributions of source code must retain the above copyright
178 * notice, this list of conditions and the following disclaimer.
179 * 2. Redistributions in binary form must reproduce the above copyright
180 * notice, this list of conditions and the following disclaimer in the
181 * documentation and/or other materials provided with the distribution.
182 * 3. The name of the author may not be used to endorse or promote products
183 * derived from this software without specific prior written permission.
184 *
185 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
186 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
187 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
188 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
189 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
190 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
191 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
192 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
193 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
194 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
195 */
196
197/*
198 * Copy src to string dst of size siz. At most siz-1 characters
199 * will be copied. Always NUL terminates (unless siz == 0).
200 * Returns strlen(src); if retval >= siz, truncation occurred.
201 */
202size_t
203strlcpy(char *dst, const char *src, size_t siz)
204{
205 register char *d = dst;
206 register const char *s = src;
207 register size_t n = siz;
208
209 /* Copy as many bytes as will fit */
210 if (n != 0 && --n != 0) {
211 do {
212 if ((*d++ = *s++) == 0)
213 break;
214 } while (--n != 0);
215 }
216
217 /* Not enough room in dst, add NUL and traverse rest of src */
218 if (n == 0) {
219 if (siz != 0)
220 *d = '\0'; /* NUL-terminate dst */
221 while (*s++)
222 ;
223 }
224
225 return(s - src - 1); /* count does not include NUL */
226}
227#endif
228
229#ifdef NEED_STRCASESTR
230/*-
231 * Copyright (c) 1990, 1993
232 * The Regents of the University of California. All rights reserved.
233 *
234 * This code is derived from software contributed to Berkeley by
235 * Chris Torek.
236 *
237 * Redistribution and use in source and binary forms, with or without
238 * modification, are permitted provided that the following conditions
239 * are met:
240 * 1. Redistributions of source code must retain the above copyright
241 * notice, this list of conditions and the following disclaimer.
242 * 2. Redistributions in binary form must reproduce the above copyright
243 * notice, this list of conditions and the following disclaimer in the
244 * documentation and/or other materials provided with the distribution.
245 * 3. Neither the name of the University nor the names of its contributors
246 * may be used to endorse or promote products derived from this software
247 * without specific prior written permission.
248 *
249 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
250 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
251 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
252 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
253 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
254 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
255 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
256 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
257 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
258 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
259 * SUCH DAMAGE.
260 */
261char *strcasestr(const char *s, const char *find)
262{
263 char c, sc;
264 size_t len;
265
266 if ((c = *find++) != 0) {
267 c = tolower((unsigned char)c);
268 len = strlen(find);
269 do {
270 do {
271 if ((sc = *s++) == 0)
272 return (NULL);
273 } while ((char)tolower((unsigned char)sc) != c);
274 } while (strncasecmp(s, find, len) != 0);
275 s--;
276 }
277 return ((char *)s);
278}
279#endif
280
281#ifdef NEED_STRDUP
282char *strdup(char *s)
283{
284 char *s1;
285
286 if(!s)
287 return(NULL);
288 s1 = (char *)malloc(strlen(s) + 1);
289 if(!s1)
290 return(NULL);
291
292 strcpy(s1,s);
293 return(s1);
294}
295#endif
296
297#ifdef NEED_STRNCASECMP
298int strncasecmp(const char *s1, const char *s2, int len)
299{
300 register const char *cp1, *cp2;
301 int cmp = 0;
302
303 cp1 = s1;
304 cp2 = s2;
305
306 if(len == 0)
307 return(0);
308
309 if (!*cp1)
310 return -1;
311 else if (!*cp2)
312 return 1;
313
314 while(*cp1 && *cp2 && len)
315 {
316 if((cmp = (toupper(*cp1) - toupper(*cp2))) != 0)
317 return(cmp);
318 cp1++;
319 cp2++;
320 len--;
321 }
322
323 if(len == 0) {
324 return(0);
325 }
326 if(*cp1 || *cp2)
327 {
328 if (*cp1)
329 return(1);
330 else
331 return (-1);
332 }
333 return(0);
334}
335#endif
336
337#ifdef NEED_STRCASECMP
338int strcasecmp(const char *s1, const char *s2)
339{
340 register const char *cp1, *cp2;
341 int cmp = 0;
342
343 cp1 = s1;
344 cp2 = s2;
345 if ((!cp1) || (!cp2 )) {
346 return (0);
347 }
348 while(*cp1 && *cp2)
349 {
350 if((cmp = (toupper(*cp1) - toupper(*cp2))) != 0)
351 return(cmp);
352 cp1++;
353 cp2++;
354 }
355 if(*cp1 || *cp2)
356 {
357 if (*cp1)
358 return(1);
359 else
360 return (-1);
361 }
362
363 return(0);
364}
365#endif
366
367char *msLongToString(long value) {
368 size_t bufferSize = 256;
369 char *buffer = (char*)msSmallMalloc(bufferSize);
370
371 snprintf(buffer, bufferSize, "%ld", value);
372 return(buffer);
373}
374
375char *msDoubleToString(double value, int force_f) {
376 size_t bufferSize = 256;
377 char *buffer = (char*)msSmallMalloc(bufferSize);
378
379 if (force_f == MS_TRUE)
380 snprintf(buffer, bufferSize, "%f", value);
381 else
382 snprintf(buffer, bufferSize, "%g", value);
383 return(buffer);
384}
385
386char *msIntToString(int value) {
387 size_t bufferSize = 256;
388 char *buffer = (char*)msSmallMalloc(bufferSize);
389
390 snprintf(buffer, bufferSize, "%i", value);
391 return(buffer);
392}
393
394void msStringToUpper(char *string) {
395 int i;
396
397 if (string != NULL) {
398 for (i = 0; i < strlen(string); i++) {
399 string[i] = toupper(string[i]);
400 }
401 return;
402 }
403}
404
405void msStringToLower(char *string) {
406 int i;
407
408 if (string != NULL) {
409 for (i = 0; i < strlen(string); i++) {
410 string[i] = tolower(string[i]);
411 }
412 return;
413 }
414}
415
416char *msStringChop(char *string) {
417 int n;
418
419 n = strlen(string);
420 if(n>0)
421 string[n-1] = '\0';
422
423 return(string);
424}
425
426/*
427** Trim leading and trailing white space.
428*/
429void msStringTrim(char *str)
430{
431 int i;
432
433 /* Send nulls home without supper. */
434 if( ! str ) return;
435
436 /* Move non-white string to the front. */
437 i = strspn(str, " ");
438 if(i) {
439 memmove(str, str + i, strlen(str) - i + 1);
440 }
441 /* Nothing left? Exit. */
442 if(strlen(str) == 0) {
443 return;
444 }
445 /* Null-terminate end of non-white string. */
446 for(i=strlen(str)-1; i>=0; i--) { /* step backwards from end */
447 if(str[i] != ' ') {
448 str[i+1] = '\0';
449 return;
450 }
451 }
452 return;
453}
454
455/*
456** Remove leading white spaces and shift everything to the left.
457*/
458char *msStringTrimLeft(char *string)
459{
460 char *read, *write;
461 int i, length;
462
463 if (string && strlen(string) > 0)
464 {
465 length = strlen(string);
466 read = string;
467 write = string;
468
469 for (i=0; i<length; i++)
470 {
471 if (isspace(string[i]))
472 read++;
473 else
474 break;
475 }
476
477 if (read > write)
478 {
479 while (*read)
480 {
481 *write = *read;
482 read++;
483 write++;
484 }
485 *write = '\0';
486 }
487 }
488 return string;
489}
490
491/* ------------------------------------------------------------------------------- */
492/* Trims trailing blanks from a string */
493/* ------------------------------------------------------------------------------- */
494void msStringTrimBlanks(char *string)
495{
496 int i,n;
497
498 n = strlen(string);
499 for(i=n-1;i>=0;i--) { /* step backwards through the string */
500 if(string[i] != ' ') {
501 string[i+1] = '\0';
502 return;
503 }
504 }
505}
506
507/* ------------------------------------------------------------------------------- */
508/* Trims end-of-line marker from a string */
509/* Usefull in conjunction with fgets() calls */
510/* ------------------------------------------------------------------------------- */
511void msStringTrimEOL(char *string)
512{
513 int i;
514
515 for(i=0 ; string[i] != '\0'; i++) {
516 if(string[i] == '\n') {
517 string[i] = '\0'; /* Terminate the string at the newline */
518 return;
519 }
520 }
521}
522
523/* ------------------------------------------------------------------------------- */
524/* Replace all occurances of old with new in str. */
525/* It is assumed that str was dynamically created using malloc. */
526/* ------------------------------------------------------------------------------- */
527char *msReplaceSubstring(char *str, const char *old, const char *new)
528{
529 size_t str_len, old_len, new_len, tmp_offset;
530 char *tmp_ptr;
531
532 if(new == NULL)
533 new = "";
534
535 /*
536 ** If old is not found then leave str alone
537 */
538 if( (tmp_ptr = strstr(str, old)) == NULL)
539 return(str);
540
541 /*
542 ** Grab some info about incoming strings
543 */
544 str_len = strlen(str);
545 old_len = strlen(old);
546 new_len = strlen(new);
547
548 /*
549 ** Now loop until old is NOT found in new
550 */
551 while( tmp_ptr != NULL ) {
552
553 /*
554 ** re-allocate memory for buf assuming 1 replacement of old with new
555 ** don't bother reallocating if old is larger than new)
556 */
557 if (old_len < new_len) {
558 tmp_offset = tmp_ptr - str;
559 str_len = str_len - old_len + new_len;
560 str = (char *)msSmallRealloc(str, (str_len + 1)); /* make new space for a copy */
561 tmp_ptr = str + tmp_offset;
562 }
563
564 /*
565 ** Move the trailing part of str to make some room unless old_len == new_len
566 */
567 if (old_len != new_len) {
568 memmove(tmp_ptr+new_len, tmp_ptr+old_len, strlen(tmp_ptr)-old_len+1);
569 }
570
571 /*
572 ** Now copy new over old
573 */
574 memcpy(tmp_ptr, new, new_len);
575
576 /*
577 ** And look for more matches in the rest of the string
578 */
579 tmp_ptr = strstr(tmp_ptr + new_len, old);
580 }
581
582 return(str);
583}
584
585/*
586 * same goal as msReplaceSubstring, but for the known case
587 * when we won't have to do reallocs etc
588 * used to replace the wrap characetr by a newline for labels
589 */
590void msReplaceChar(char *str, char old, char new) {
591 while(*(str++))
592 if(*str==old)
593 *str=new;
594}
595
596/*
597** how many times does ch occur in str
598*/
599int msCountChars(char *str, char ch)
600{
601 int i, l, n=0;
602
603 l = strlen(str);
604 for(i=0;i<l;i++)
605 if(str[i] == ch) n++;
606
607 return(n);
608}
609
610/* ------------------------------------------------------------------------------- */
611/* Strip filename from a full path */
612/* ------------------------------------------------------------------------------- */
613char *msStripPath(char *fn)
614{
615 char *str;
616
617 if((str = strrchr(fn,'/')) != NULL) { /* return pointer to last "slash" */
618 str++; /* skip past the "slash" */
619 return(str);
620 } else
621 return(fn);
622}
623
624/*
625** Returns the *path* portion of the filename fn. Memory is allocated using malloc.
626*/
627char *msGetPath(char *fn)
628{
629 char *str;
630 int i, length;
631
632 length = strlen(fn);
633 if((str = msStrdup(fn)) == NULL)
634 return(NULL);
635
636 for(i=length-1; i>=0; i--) { /* step backwards through the string */
637 if((str[i] == '/') || (str[i] == '\\')) {
638 str[i+1] = '\0';
639 break;
640 }
641 }
642
643 if(strcmp(str, fn) == 0)
644 {
645 msFree(str);
646#if defined(_WIN32) && !defined(__CYGWIN__)
647 str = msStrdup(".\\");
648#else
649 str= msStrdup("./");
650#endif
651 }
652
653 return(str);
654}
655
656/*
657** Returns a *path* built from abs_path and path.
658** The pszReturnPath must be declared by the caller function as an array
659** of MS_MAXPATHLEN char
660*/
661char *msBuildPath(char *pszReturnPath, const char *abs_path, const char *path)
662{
663 int abslen = 0;
664 int pathlen = 0;
665
666
667 if(path == NULL)
668 {
669 msSetError(MS_IOERR, NULL, "msBuildPath");
670 return NULL;
671 }
672
673 pathlen = strlen(path);
674 if (abs_path)
675 abslen = strlen(abs_path);
676
677 if((pathlen + abslen + 2) > MS_MAXPATHLEN)
678 {
679 msSetError(MS_IOERR, "(%s%s): path is too long", "msBuildPath()",
680 abs_path, path);
681 return NULL;
682 }
683
684 /* Check if path is absolute */
685 if((abs_path == NULL) || (abslen == 0) ||
686 (path[0] == '\\') || (path[0] == '/') ||
687 (pathlen > 1 && (path[1] == ':')))
688 {
689 strlcpy(pszReturnPath, path, MS_MAXPATHLEN);
690 return(pszReturnPath);
691 }
692
693 /* else return abs_path/path */
694 if((abs_path[abslen-1] == '/') || (abs_path[abslen-1] == '\\'))
695 {
696 snprintf(pszReturnPath, MS_MAXPATHLEN, "%s%s", abs_path, path);
697 }
698 else
699 {
700 snprintf(pszReturnPath, MS_MAXPATHLEN, "%s/%s", abs_path, path);
701 }
702
703 return(pszReturnPath);
704}
705
706/*
707** Returns a *path* built from abs_path, path1 and path2.
708** abs_path/path1/path2
709** The pszReturnPath must be declared by the caller function as an array
710** of MS_MAXPATHLEN char
711*/
712char *msBuildPath3(char *pszReturnPath, const char *abs_path, const char *path1,const char *path2)
713{
714 char szPath[MS_MAXPATHLEN];
715
716 return msBuildPath(pszReturnPath, abs_path,
717 msBuildPath(szPath, path1, path2));
718}
719
720/*
721** Similar to msBuildPath(), but the input path is only qualified by the
722** absolute path if this will result in it pointing to a readable file.
723**
724** Returns NULL if the resulting path doesn't point to a readable file.
725*/
726
727char *msTryBuildPath(char *szReturnPath, const char *abs_path, const char *path)
728
729{
730 FILE *fp;
731
732 if( msBuildPath( szReturnPath, abs_path, path ) == NULL )
733 return NULL;
734
735 fp = fopen( szReturnPath, "r" );
736 if( fp == NULL )
737 {
738 strlcpy( szReturnPath, path, MS_MAXPATHLEN);
739 return NULL;
740 }
741 else
742 fclose( fp );
743
744 return szReturnPath;
745}
746
747/*
748** Similar to msBuildPath3(), but the input path is only qualified by the
749** absolute path if this will result in it pointing to a readable file.
750**
751** Returns NULL if the resulting path doesn't point to a readable file.
752*/
753
754char *msTryBuildPath3(char *szReturnPath, const char *abs_path, const char *path1, const char *path2)
755
756{
757 FILE *fp;
758
759 if( msBuildPath3( szReturnPath, abs_path, path1, path2 ) == NULL )
760 return NULL;
761
762 fp = fopen( szReturnPath, "r" );
763 if( fp == NULL )
764 {
765 strlcpy( szReturnPath, path2, MS_MAXPATHLEN);
766 return NULL;
767 }
768 else
769 fclose( fp );
770
771 return szReturnPath;
772}
773
774/*
775** Splits a string into multiple strings based on ch. Consecutive ch's are ignored.
776*/
777char **msStringSplit(const char *string, char ch, int *num_tokens)
778{
779 int i,j,k;
780 int length,n;
781 char **token;
782 char last_ch='\0';
783
784 n = 1; /* always at least 1 token, the string itself */
785 length = strlen(string);
786 for(i=0; i<length; i++) {
787 if(string[i] == ch && last_ch != ch)
788 n++;
789 last_ch = string[i];
790 }
791
792 token = (char **) msSmallMalloc(sizeof(char *)*n);
793 if(!token) return(NULL);
794
795 k = 0;
796 token[k] = (char *)msSmallMalloc(sizeof(char)*(length+1));
797 if(!token[k]) return(NULL);
798
799 j = 0;
800 last_ch='\0';
801 for(i=0; i<length; i++) {
802 if(string[i] == ch) {
803
804 if(last_ch == ch)
805 continue;
806
807 token[k][j] = '\0'; /* terminate current token */
808
809 k++;
810 token[k] = (char *)msSmallMalloc(sizeof(char)*(length+1));
811 if(!token[k]) return(NULL);
812
813 j = 0;
814 } else {
815 token[k][j] = string[i];
816 j++;
817 }
818
819 last_ch = string[i];
820 }
821
822 token[k][j] = '\0'; /* terminate last token */
823
824 *num_tokens = n;
825
826 return(token);
827}
828
829/*
830 This function is a copy of CSLTokenizeString2() function of the CPL component.
831 See the port/cpl_string.cpp file in gdal source for the complete documentation.
832 Available Flags:
833 * - MS_ALLOWEMPTYTOKENS: allow the return of empty tokens when two
834 * delimiters in a row occur with no other text between them. If not set,
835 * empty tokens will be discarded;
836 * - MS_STRIPLEADSPACES: strip leading space characters from the token (as
837 * reported by isspace());
838 * - MS_STRIPENDSPACES: strip ending space characters from the token (as
839 * reported by isspace());
840 * - MS_HONOURSTRINGS: double quotes can be used to hold values that should
841 * not be broken into multiple tokens;
842 * - MS_PRESERVEQUOTES: string quotes are carried into the tokens when this
843 * is set, otherwise they are removed;
844 * - MS_PRESERVEESCAPES: if set backslash escapes (for backslash itself,
845 * and for literal double quotes) will be preserved in the tokens, otherwise
846 * the backslashes will be removed in processing.
847 */
848char ** msStringSplitComplex( const char * pszString,
849 const char * pszDelimiters,
850 int *num_tokens,
851 int nFlags )
852
853{
854 char **papszRetList = NULL;
855 int nRetMax = 0, nRetLen = 0;
856 char *pszToken;
857 int nTokenMax, nTokenLen;
858 int bHonourStrings = (nFlags & MS_HONOURSTRINGS);
859 int bAllowEmptyTokens = (nFlags & MS_ALLOWEMPTYTOKENS);
860 int bStripLeadSpaces = (nFlags & MS_STRIPLEADSPACES);
861 int bStripEndSpaces = (nFlags & MS_STRIPENDSPACES);
862
863 pszToken = (char *) msSmallMalloc(sizeof(char*)*10);;
864 nTokenMax = 10;
865
866 while( pszString != NULL && *pszString != '\0' )
867 {
868 int bInString = MS_FALSE;
869 int bStartString = MS_TRUE;
870
871 nTokenLen = 0;
872
873 /* Try to find the next delimeter, marking end of token */
874 for( ; *pszString != '\0'; pszString++ )
875 {
876
877 /* End if this is a delimeter skip it and break. */
878 if( !bInString && strchr(pszDelimiters, *pszString) != NULL )
879 {
880 pszString++;
881 break;
882 }
883
884 /* If this is a quote, and we are honouring constant
885 strings, then process the constant strings, with out delim
886 but don't copy over the quotes */
887 if( bHonourStrings && *pszString == '"' )
888 {
889 if( nFlags & MS_PRESERVEQUOTES )
890 {
891 pszToken[nTokenLen] = *pszString;
892 nTokenLen++;
893 }
894
895 if( bInString )
896 {
897 bInString = MS_FALSE;
898 continue;
899 }
900 else
901 {
902 bInString = MS_TRUE;
903 continue;
904 }
905 }
906
907 /*
908 * Within string constants we allow for escaped quotes, but in
909 * processing them we will unescape the quotes and \\ sequence
910 * reduces to \
911 */
912 if( bInString && pszString[0] == '\\' )
913 {
914 if ( pszString[1] == '"' || pszString[1] == '\\' )
915 {
916 if( nFlags & MS_PRESERVEESCAPES )
917 {
918 pszToken[nTokenLen] = *pszString;
919 nTokenLen++;
920 }
921
922 pszString++;
923 }
924 }
925
926 /*
927 * Strip spaces at the token start if requested.
928 */
929 if ( !bInString && bStripLeadSpaces
930 && bStartString && isspace((unsigned char)*pszString) )
931 continue;
932
933 bStartString = MS_FALSE;
934
935 /*
936 * Extend token buffer if we are running close to its end.
937 */
938 if( nTokenLen >= nTokenMax-3 )
939 {
940 nTokenMax = nTokenMax * 2 + 10;
941 pszToken = (char *) msSmallRealloc(pszToken, sizeof(char*)*nTokenMax);
942 }
943
944 pszToken[nTokenLen] = *pszString;
945 nTokenLen++;
946 }
947
948 /*
949 * Strip spaces at the token end if requested.
950 */
951 if ( !bInString && bStripEndSpaces )
952 {
953 while ( nTokenLen && isspace((unsigned char)pszToken[nTokenLen - 1]) )
954 nTokenLen--;
955 }
956
957 pszToken[nTokenLen] = '\0';
958
959 /*
960 * Add the token.
961 */
962 if( pszToken[0] != '\0' || bAllowEmptyTokens )
963 {
964 if( nRetLen >= nRetMax - 1 )
965 {
966 nRetMax = nRetMax * 2 + 10;
967 papszRetList = (char **) msSmallRealloc(papszRetList, sizeof(char*)*nRetMax);
968 }
969
970 papszRetList[nRetLen++] = msStrdup( pszToken );
971 papszRetList[nRetLen] = NULL;
972 }
973 }
974
975 /*
976 * If the last token was empty, then we need to capture
977 * it now, as the loop would skip it.
978 */
979 if( *pszString == '\0' && bAllowEmptyTokens && nRetLen > 0
980 && strchr(pszDelimiters,*(pszString-1)) != NULL )
981 {
982 if( nRetLen >= nRetMax - 1 )
983 {
984 nRetMax = nRetMax * 2 + 10;
985 papszRetList = (char **) msSmallRealloc(papszRetList, sizeof(char*)*nRetMax);
986 }
987
988 papszRetList[nRetLen++] = msStrdup("");
989 papszRetList[nRetLen] = NULL;
990 }
991
992 if( papszRetList == NULL )
993 papszRetList = (char **) msSmallMalloc(sizeof(char *)*1);
994
995 *num_tokens = nRetLen;
996 free(pszToken);
997
998 return papszRetList;
999}
1000
1001/* This method is similar to msStringSplit but support quoted strings.
1002 It also support multi-characters delimiter and allows to preserve quotes */
1003char **msStringTokenize( const char *pszLine, const char *pszDelim,
1004 int *num_tokens, int preserve_quote )
1005{
1006 char **papszResult = NULL;
1007 int n = 1, iChar, nLength = strlen(pszLine), iTokenChar = 0, bInQuotes = MS_FALSE;
1008 char *pszToken = (char *) msSmallMalloc(sizeof(char*)*(nLength+1));
1009 int nDelimLen = strlen(pszDelim);
1010
1011 /* Compute the number of tokens */
1012 for( iChar = 0; pszLine[iChar] != '\0'; iChar++ )
1013 {
1014 if( bInQuotes && pszLine[iChar] == '"' && pszLine[iChar+1] == '"' )
1015 {
1016 iChar++;
1017 }
1018 else if( pszLine[iChar] == '"' )
1019 {
1020 bInQuotes = !bInQuotes;
1021 }
1022 else if ( !bInQuotes && strncmp(pszLine+iChar,pszDelim,nDelimLen) == 0 )
1023 {
1024 iChar += nDelimLen - 1;
1025 n++;
1026 }
1027 }
1028
1029 papszResult = (char **) msSmallMalloc(sizeof(char *)*n);
1030 n = iTokenChar = bInQuotes = 0;
1031 for( iChar = 0; pszLine[iChar] != '\0'; iChar++ )
1032 {
1033 if( bInQuotes && pszLine[iChar] == '"' && pszLine[iChar+1] == '"' )
1034 {
1035 if (preserve_quote == MS_TRUE)
1036 pszToken[iTokenChar++] = '"';
1037 pszToken[iTokenChar++] = '"';
1038 iChar++;
1039 }
1040 else if( pszLine[iChar] == '"' )
1041 {
1042 if (preserve_quote == MS_TRUE)
1043 pszToken[iTokenChar++] = '"';
1044 bInQuotes = !bInQuotes;
1045 }
1046 else if( !bInQuotes && strncmp(pszLine+iChar,pszDelim,nDelimLen) == 0 )
1047 {
1048 pszToken[iTokenChar++] = '\0';
1049 papszResult[n] = pszToken;
1050 pszToken = (char *) msSmallMalloc(sizeof(char*)*(nLength+1));
1051 iChar += nDelimLen - 1;
1052 iTokenChar = 0;
1053 n++;
1054 }
1055 else
1056 {
1057 pszToken[iTokenChar++] = pszLine[iChar];
1058 }
1059 }
1060
1061 pszToken[iTokenChar++] = '\0';
1062 papszResult[n] = pszToken;
1063
1064 *num_tokens = n+1;
1065
1066 return papszResult;
1067}
1068
1069/**********************************************************************
1070 * msEncodeChar()
1071 *
1072 * Return 1 if the character argument should be encoded for safety
1073 * in URL use and 0 otherwise. Specific character map taken from
1074 * http://www.ietf.org/rfc/rfc2396.txt
1075 *
1076 **********************************************************************/
1077
1078int msEncodeChar(const char c)
1079{
1080 if (
1081 (c >= 0x61 && c <= 0x7A ) || /* Letters a-z */
1082 (c >= 0x41 && c <= 0x5A ) || /* Letters A-Z */
1083 (c >= 0x30 && c <= 0x39 ) || /* Numbers 0-9 */
1084 (c >= 0x27 && c <= 0x2A ) || /* * ' ( ) */
1085 (c >= 0x2D && c <= 0x2E ) || /* - . */
1086 (c == 0x5F ) || /* _ */
1087 (c == 0x21 ) || /* ! */
1088 (c == 0x7E ) ) /* ~ */
1089 {
1090 return(0);
1091 }
1092 else
1093 {
1094 return(1);
1095 }
1096}
1097
1098char *msEncodeUrl(const char *data)
1099{
1100 /*
1101 * Delegate to msEncodeUrlExcept, with a null second argument
1102 * to render the except handling moot.
1103 */
1104 return(msEncodeUrlExcept(data, '\0'));
1105}
1106
1107/**********************************************************************
1108 * msEncodeCharExcept()
1109 *
1110 * URL encoding, applies RFP2396 encoding to all characters
1111 * except the one exception character. An exception character
1112 * of '\0' implies no exception handling.
1113 *
1114 **********************************************************************/
1115
1116char *msEncodeUrlExcept(const char *data, const char except)
1117{
1118 char *hex = "0123456789ABCDEF";
1119 const char *i;
1120 char *j, *code;
1121 int inc;
1122 unsigned char ch;
1123
1124 for (inc=0, i=data; *i!='\0'; i++)
1125 if (msEncodeChar(*i))
1126 inc += 2;
1127
1128 code = (char*)msSmallMalloc(strlen(data)+inc+1);
1129
1130 for (j=code, i=data; *i!='\0'; i++, j++)
1131 {
1132 if (*i == ' ')
1133 *j = '+';
1134 else
1135 if ( except != '\0' && *i == except )
1136 {
1137 *j = except;
1138 }
1139 else
1140 if (msEncodeChar(*i))
1141 {
1142 ch = *i;
1143 *j++ = '%';
1144 *j++ = hex[ch/16];
1145 *j = hex[ch%16];
1146 }
1147 else
1148 *j = *i;
1149 }
1150 *j = '\0';
1151
1152 return code;
1153}
1154
1155/* msEncodeHTMLEntities()
1156**
1157** Return a copy of string after replacing some problematic chars with their
1158** HTML entity equivalents.
1159**
1160** The replacements performed are:
1161** '&' -> "&amp;", '"' -> "&quot;", '<' -> "&lt;" and '>' -> "&gt;"
1162**/
1163char *msEncodeHTMLEntities(const char *string)
1164{
1165 int buflen, i;
1166 char *newstring;
1167 const char *c;
1168
1169 if(string == NULL)
1170 return NULL;
1171
1172 /* Start with 100 extra chars for replacements... */
1173 /* should be good enough for most cases */
1174 buflen = strlen(string) + 100;
1175 newstring = (char*)malloc(buflen+1);
1176 MS_CHECK_ALLOC(newstring, buflen+1, NULL);
1177
1178 for(i=0, c=string; *c != '\0'; c++)
1179 {
1180 /* Need to realloc buffer? */
1181 if (i+6 > buflen)
1182 {
1183 /* If we had to realloc then this string must contain several */
1184 /* entities... so let's go with twice the previous buffer size */
1185 buflen *= 2;
1186 newstring = (char*)realloc(newstring, buflen+1);
1187 MS_CHECK_ALLOC(newstring, buflen+1, NULL);
1188 }
1189
1190 switch(*c)
1191 {
1192 case '&':
1193 strcpy(newstring+i, "&amp;");
1194 i += 5;
1195 break;
1196 case '<':
1197 strcpy(newstring+i, "&lt;");
1198 i += 4;
1199 break;
1200 case '>':
1201 strcpy(newstring+i, "&gt;");
1202 i += 4;
1203 break;
1204 case '"':
1205 strcpy(newstring+i, "&quot;");
1206 i += 6;
1207 break;
1208 case '\'':
1209 strcpy(newstring+i, "&#39;"); /* changed from &apos; and i += 6 (bug 1040) */
1210 i += 5;
1211 break;
1212 default:
1213 newstring[i++] = *c;
1214 }
1215 }
1216
1217 newstring[i++] = '\0';
1218
1219 return newstring;
1220}
1221
1222
1223/* msDecodeHTMLEntities()
1224**
1225** Modify the string to replace encoded characters by their true value
1226**
1227** The replacements performed are:
1228** "&amp;" -> '&', "&quot;" -> '"', "&lt;" -> '<' and "&gt;" -> '>'
1229**/
1230void msDecodeHTMLEntities(const char *string)
1231{
1232 char *pszAmp=NULL, *pszSemiColon=NULL, *pszReplace=NULL, *pszEnd=NULL;
1233 char *pszBuffer=NULL;
1234 size_t bufferSize = 0;
1235
1236 if(string == NULL)
1237 return;
1238 else
1239 pszBuffer = (char*)string;
1240
1241 bufferSize = strlen(pszBuffer);
1242 pszReplace = (char*) msSmallMalloc(bufferSize);
1243 pszEnd = (char*) msSmallMalloc(bufferSize);
1244
1245 while((pszAmp = strchr(pszBuffer, '&')) != NULL)
1246 {
1247 /* Get the &...; */
1248 strlcpy(pszReplace, pszAmp, bufferSize);
1249 pszSemiColon = strchr(pszReplace, ';');
1250 if(pszSemiColon == NULL)
1251 break;
1252 else
1253 pszSemiColon++;
1254
1255 /* Get everything after the &...; */
1256 strlcpy(pszEnd, pszSemiColon, bufferSize);
1257
1258 pszReplace[pszSemiColon-pszReplace] = '\0';
1259
1260 /* Replace the &...; */
1261 if(strcasecmp(pszReplace, "&amp;") == 0)
1262 {
1263 pszBuffer[pszAmp - pszBuffer] = '&';
1264 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1265 strcat(pszBuffer, pszEnd);
1266 }
1267 else if(strcasecmp(pszReplace, "&lt;") == 0)
1268 {
1269 pszBuffer[pszAmp - pszBuffer] = '<';
1270 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1271 strcat(pszBuffer, pszEnd);
1272 }
1273 else if(strcasecmp(pszReplace, "&gt;") == 0)
1274 {
1275 pszBuffer[pszAmp - pszBuffer] = '>';
1276 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1277 strcat(pszBuffer, pszEnd);
1278 }
1279 else if(strcasecmp(pszReplace, "&quot;") == 0)
1280 {
1281 pszBuffer[pszAmp - pszBuffer] = '"';
1282 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1283 strcat(pszBuffer, pszEnd);
1284 }
1285 else if(strcasecmp(pszReplace, "&apos;") == 0)
1286 {
1287 pszBuffer[pszAmp - pszBuffer] = '\'';
1288 pszBuffer[pszAmp - pszBuffer + 1] = '\0';
1289 strcat(pszBuffer, pszEnd);
1290 }
1291
1292 pszBuffer = pszAmp + 1;
1293 }
1294
1295 free(pszReplace);
1296 free(pszEnd);
1297
1298 return;
1299}
1300
1301/*
1302** msIsXMLValid
1303**
1304** Check if the string is an XML valid string. It should contains only
1305** A-Z, a-z, 0-9, '_', '-', '.', and ':'
1306** Return MS_TRUE or MS_FALSE
1307*/
1308int msIsXMLTagValid(const char *string)
1309{
1310 int i, nLen;
1311
1312 nLen = strlen(string);
1313
1314 for(i=0; i<nLen; i++)
1315 {
1316 if( !( string[i] >= 'A' && string[i] <= 'Z' ) &&
1317 !( string[i] >= 'a' && string[i] <= 'z' ) &&
1318 !( string[i] >= '0' && string[i] <= '9' ) &&
1319 string[i] != '-' && string[i] != '.' &&
1320 string[i] != ':' && string[i] != '_' )
1321 return MS_FALSE;
1322 }
1323
1324 return MS_TRUE;
1325}
1326
1327
1328/*
1329 * Concatenate pszSrc to pszDest and reallocate memory if necessary.
1330*/
1331char *msStringConcatenate(char *pszDest, const char *pszSrc)
1332{
1333 int nLen;
1334
1335 if (pszSrc == NULL)
1336 return pszDest;
1337
1338 /* if destination is null, allocate memory */
1339 if (pszDest == NULL) {
1340 pszDest = msStrdup(pszSrc);
1341 }
1342 else { /* if dest is not null, reallocate memory */
1343 char *pszTemp;
1344
1345 nLen = strlen(pszDest) + strlen(pszSrc);
1346
1347 pszTemp = (char*)realloc(pszDest, nLen + 1);
1348 if (pszTemp) {
1349 pszDest = pszTemp;
1350 strcat(pszDest, pszSrc);
1351 pszDest[nLen] = '\0';
1352 }
1353 else {
1354 msSetError(MS_MEMERR, "Error while reallocating memory.", "msStringConcatenate()");
1355 return NULL;
1356 }
1357 }
1358
1359 return pszDest;
1360}
1361
1362char *msJoinStrings(char **array, int arrayLength, const char *delimeter)
1363{
1364 char *string;
1365 int stringLength=0;
1366 int delimeterLength;
1367 int i;
1368
1369 if(!array || arrayLength <= 0 || !delimeter) return NULL;
1370
1371 delimeterLength = strlen(delimeter);
1372
1373 for(i=0; i<arrayLength; i++)
1374 stringLength += strlen(array[i]) + delimeterLength;
1375
1376 string = (char *)calloc(stringLength+1, sizeof(char));
1377 MS_CHECK_ALLOC(string, (stringLength+1)* sizeof(char), NULL);
1378 string[0] = '\0';
1379
1380 for(i=0; i<arrayLength-1; i++) {
1381 strlcat(string, array[i], stringLength);
1382 strlcat(string, delimeter, stringLength);
1383 }
1384 strlcat(string, array[i], stringLength); /* add last element, no delimiter */
1385
1386 return string;
1387}
1388
1389#define HASH_SIZE 16
1390/*
1391 * Return a hashed string for a given input string.
1392 * The caller should free the return value.
1393*/
1394char *msHashString(const char *pszStr)
1395{
1396 unsigned char sums[HASH_SIZE] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1397 char *pszOutBuf = NULL;
1398 size_t bufferSize = 0;
1399 int i=0;
1400
1401 bufferSize = HASH_SIZE*2+1;
1402 pszOutBuf = (char*)msSmallMalloc(bufferSize);
1403
1404 for(i=0; pszStr && pszStr[i]; i++)
1405 {
1406 sums[i%HASH_SIZE] += (unsigned char)(pszStr[i]);
1407 }
1408
1409 for(i=0; i<HASH_SIZE; i++)
1410 {
1411 snprintf(pszOutBuf + i*2, bufferSize-(i*2), "%02x", sums[i]);
1412 }
1413
1414 return pszOutBuf;
1415}
1416
1417char *msCommifyString(char *str)
1418{
1419 int i, j, old_length, new_length;
1420 int num_commas=0, num_decimal_points=0;
1421 int add_commas;
1422
1423 char comma=',', decimal_point='.';
1424
1425 if(!str) return NULL;
1426
1427 num_decimal_points = msCountChars(str, decimal_point);
1428 if(num_decimal_points > 1) return str;
1429
1430 old_length = strlen(str);
1431 if(num_decimal_points == 0) {
1432 num_commas = floor((old_length - 1)/3);
1433 add_commas=1; /* add commas right away */
1434 } else {
1435 num_commas = floor(((old_length - strlen(strchr(str, decimal_point))) - 1)/3);
1436 add_commas=0; /* wait until after the decimal point */
1437 }
1438
1439 if(num_commas < 1) return str; /* nothing to add */
1440
1441 new_length = old_length + num_commas;
1442 str = (char *) msSmallRealloc(str, new_length+1);
1443 str[new_length] = '\0';
1444
1445 j = 0;
1446 for(i=new_length-1;i>=0;i--) { /* step backwards through the string */
1447
1448 if(num_decimal_points == 1 && add_commas == 0) { /* to the right of the decimal point, no commas */
1449 str[i] = str[i-num_commas];
1450 if(str[i] == decimal_point) add_commas = 1;
1451 } else if(add_commas == 1 && j>2) { /* need a comma */
1452 str[i] = comma;
1453 num_commas--; /* need one fewer now */
1454 j = 0; /* reset */
1455 } else {
1456 str[i] = str[i-num_commas]; /* shift to the right */
1457 j++;
1458 }
1459
1460 if(num_commas == 0) break; /* done, rest of string is ok "as is" */
1461 }
1462
1463 return str;
1464}
1465
1466
1467/* ------------------------------------------------------------------------------- */
1468/* Replace all occurances of old with new in str. */
1469/* It is assumed that str was dynamically created using malloc. */
1470/* Same function as msReplaceSubstring but this is case incensitive */
1471/* ------------------------------------------------------------------------------- */
1472char *msCaseReplaceSubstring(char *str, const char *old, const char *new)
1473{
1474 size_t str_len, old_len, new_len, tmp_offset;
1475 char *tmp_ptr;
1476
1477 if(new == NULL)
1478 new = "";
1479
1480 /*
1481 ** If old is not found then leave str alone
1482 */
1483 if( (tmp_ptr = (char *) strcasestr(str, old)) == NULL)
1484 return(str);
1485
1486 /*
1487 ** Grab some info about incoming strings
1488 */
1489 str_len = strlen(str);
1490 old_len = strlen(old);
1491 new_len = strlen(new);
1492
1493 /*
1494 ** Now loop until old is NOT found in new
1495 */
1496 while( tmp_ptr != NULL ) {
1497
1498 /*
1499 ** re-allocate memory for buf assuming 1 replacement of old with new
1500 ** don't bother reallocating if old is larger than new)
1501 */
1502 if (old_len < new_len) {
1503 tmp_offset = tmp_ptr - str;
1504 str_len = str_len - old_len + new_len;
1505 str = (char *)msSmallRealloc(str, (str_len + 1)); /* make new space for a copy */
1506 tmp_ptr = str + tmp_offset;
1507 }
1508
1509 /*
1510 ** Move the trailing part of str to make some room unless old_len == new_len
1511 */
1512 if (old_len != new_len) {
1513 memmove(tmp_ptr+new_len, tmp_ptr+old_len, strlen(tmp_ptr)-old_len+1);
1514 }
1515
1516 /*
1517 ** Now copy new over old
1518 */
1519 memcpy(tmp_ptr, new, new_len);
1520
1521 /*
1522 ** And look for more matches in the rest of the string
1523 */
1524 tmp_ptr = (char *) strcasestr(tmp_ptr + new_len, old);
1525 }
1526
1527 return(str);
1528}
1529
1530/*
1531** Converts a 2 character hexidecimal string to an integer.
1532*/
1533int msHexToInt(char *hex) {
1534 int number;
1535
1536 number = (hex[0] >= 'A' ? ((hex[0] & 0xdf) - 'A')+10 : (hex[0] - '0'));
1537 number *= 16;
1538 number += (hex[1] >= 'A' ? ((hex[1] & 0xdf) - 'A')+10 : (hex[1] - '0'));
1539
1540 return(number);
1541}
1542
1543
1544/*
1545** Use FRIBIDI to encode the string.
1546** The return value must be freed by the caller.
1547*/
1548#ifdef USE_FRIBIDI
1549char *msGetFriBidiEncodedString(const char *string, const char *encoding)
1550{
1551 FriBidiChar logical[MAX_STR_LEN];
1552 FriBidiCharType base = FRIBIDI_TYPE_ON;
1553 size_t len;
1554
1555#ifdef FRIBIDI_NO_CHARSETS
1556 iconv_t to_ucs4, from_ucs4;
1557#else
1558 int to_char_set_num;
1559 int from_char_set_num;
1560#endif
1561
1562 len = strlen(string);
1563
1564#ifdef FRIBIDI_NO_CHARSETS
1565 to_ucs4 = iconv_open ("WCHAR_T", encoding);
1566 from_ucs4 = iconv_open ("UTF-8", "WCHAR_T");
1567#else
1568 to_char_set_num = fribidi_parse_charset ((char*)encoding);
1569 from_char_set_num = fribidi_parse_charset ("UTF-8");
1570#endif
1571
1572#ifdef FRIBIDI_NO_CHARSETS
1573 if (to_ucs4 == (iconv_t) (-1) || from_ucs4 == (iconv_t) (-1))
1574#else
1575 if (!to_char_set_num || !from_char_set_num)
1576#endif
1577 {
1578 msSetError(MS_IDENTERR, "Encoding not supported (%s).",
1579 "msGetFriBidiEncodedString()", encoding);
1580 return NULL;
1581 }
1582
1583#ifdef FRIBIDI_NO_CHARSETS
1584 {
1585 char *st = string, *ust = (char *) logical;
1586 int in_len = (int) len;
1587 len = sizeof logical;
1588 iconv (to_ucs4, &st, &in_len, &ust, (int *) &len);
1589 len = (FriBidiChar *) ust - logical;
1590 }
1591#else
1592 len = fribidi_charset_to_unicode (to_char_set_num, (char*)string, len, logical);
1593#endif
1594
1595 {
1596 FriBidiChar *visual;
1597 char outstring[MAX_STR_LEN];
1598 FriBidiStrIndex *ltov, *vtol;
1599 FriBidiLevel *levels;
1600 FriBidiStrIndex new_len;
1601 fribidi_boolean log2vis;
1602 int i, j;
1603
1604 visual = (FriBidiChar *) msSmallMalloc (sizeof (FriBidiChar) * (len + 1));
1605 ltov = NULL;
1606 vtol = NULL;
1607 levels = NULL;
1608
1609 /* Create a bidi string. */
1610 log2vis = fribidi_log2vis (logical, len, &base,
1611 /* output */
1612 visual, ltov, vtol, levels);
1613
1614 if (!log2vis) {
1615 msSetError(MS_IDENTERR, "Failed to create bidi string.",
1616 "msGetFriBidiEncodedString()");
1617 return NULL;
1618 }
1619
1620 new_len = len;
1621
1622 /* Convert it to utf-8 for display. */
1623#ifdef FRIBIDI_NO_CHARSETS
1624 {
1625 char *str = outstring, *ust = (char *) visual;
1626 int in_len = len * sizeof visual[0];
1627 new_len = sizeof outstring;
1628 iconv (from_ucs4, &ust, &in_len, &str, (int *) &new_len);
1629 *str = '\0';
1630 new_len = str - outstring;
1631 }
1632#else
1633 new_len =
1634 fribidi_unicode_to_charset (from_char_set_num,
1635 visual, len, outstring);
1636
1637 /* scan str and compress out FRIBIDI_CHAR_FILL UTF8 characters */
1638
1639 for (i=0, j=0; i<new_len; i++, j++)
1640 {
1641 if (outstring[i] == '\xef' && outstring[i+1] == '\xbb' && outstring[i+2] == '\xbf')
1642 {
1643 i += 3;
1644 }
1645 if (i != j)
1646 {
1647 outstring[j] = outstring[i];
1648 }
1649 }
1650 outstring[j] = '\0';
1651
1652#endif
1653
1654 free(visual);
1655 return msStrdup(outstring);
1656 }
1657}
1658#endif
1659
1660/*
1661** Simple charset converter. Converts string from specified encoding to UTF-8.
1662** The return value must be freed by the caller.
1663*/
1664char *msGetEncodedString(const char *string, const char *encoding)
1665{
1666#ifdef USE_ICONV
1667 iconv_t cd = NULL;
1668 const char *inp;
1669 char *outp, *out = NULL;
1670 size_t len, bufsize, bufleft, iconv_status;
1671
1672#ifdef USE_FRIBIDI
1673 if(fribidi_parse_charset ((char*)encoding))
1674 return msGetFriBidiEncodedString(string, encoding);
1675#endif
1676 len = strlen(string);
1677
1678 if (len == 0 || (encoding && strcasecmp(encoding, "UTF-8")==0))
1679 return msStrdup(string); /* Nothing to do: string already in UTF-8 */
1680
1681 cd = iconv_open("UTF-8", encoding);
1682 if(cd == (iconv_t)-1) {
1683 msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).",
1684 "msGetEncodedString()", encoding);
1685 return NULL;
1686 }
1687
1688 bufsize = len * 6 + 1; /* Each UTF-8 char can be up to 6 bytes */
1689 inp = string;
1690 out = (char*) malloc(bufsize);
1691 if(out == NULL){
1692 msSetError(MS_MEMERR, NULL, "msGetEncodedString()");
1693 iconv_close(cd);
1694 return NULL;
1695 }
1696 strlcpy(out, string, bufsize);
1697 outp = out;
1698
1699 bufleft = bufsize;
1700 iconv_status = -1;
1701
1702 while (len > 0){
1703 iconv_status = iconv(cd, (char**)&inp, &len, &outp, &bufleft);
1704 if(iconv_status == -1){
1705 msFree(out);
1706 iconv_close(cd);
1707 return msStrdup(string);
1708 }
1709 }
1710 out[bufsize - bufleft] = '\0';
1711
1712 iconv_close(cd);
1713
1714 return out;
1715#else
1716 if (*string == '\0' || (encoding && strcasecmp(encoding, "UTF-8")==0))
1717 return msStrdup(string); /* Nothing to do: string already in UTF-8 */
1718
1719 msSetError(MS_MISCERR, "Not implemeted since Iconv is not enabled.", "msGetEncodedString()");
1720 return NULL;
1721#endif
1722}
1723
1724
1725char* msConvertWideStringToUTF8 (const wchar_t* string, const char* encoding) {
1726#ifdef USE_ICONV
1727
1728 char* output = NULL;
1729 char* errormessage = NULL;
1730 iconv_t cd = NULL;
1731 size_t nStr;
1732 size_t nInSize;
1733 size_t nOutSize;
1734 size_t iconv_status = -1;
1735 size_t nBufferSize;
1736
1737 char* pszUTF8 = NULL;
1738 const wchar_t* pwszWide = NULL;
1739
1740 if (string != NULL)
1741 {
1742 nStr = wcslen (string);
1743 nBufferSize = ((nStr * 6) + 1);
1744 output = (char*) msSmallMalloc (nBufferSize);
1745
1746 if (nStr == 0) {
1747 /* return an empty 8 byte string */
1748 output[0] = '\0';
1749 return output;
1750 }
1751
1752 cd = iconv_open("UTF-8", encoding);
1753
1754 nOutSize = nBufferSize;
1755 if ((iconv_t)-1 != cd)
1756 {
1757 nInSize = sizeof (wchar_t)*nStr;
1758 pszUTF8 = output;
1759 pwszWide = string;
1760 iconv_status = iconv(cd, (char **)&pwszWide, &nInSize, &pszUTF8, &nOutSize);
1761 if ((size_t)-1 == iconv_status) {
1762 switch (errno) {
1763 case E2BIG:
1764 errormessage = "There is not sufficient room in buffer";
1765 break;
1766 case EILSEQ:
1767 errormessage = "An invalid multibyte sequence has been encountered in the input";
1768 break;
1769 case EINVAL:
1770 errormessage = "An incomplete multibyte sequence has been encountered in the input";
1771 break;
1772 default:
1773 errormessage = "Unknown";
1774 break;
1775 }
1776 msSetError(MS_MISCERR, "Unable to convert string in encoding '%s' to UTF8 %s",
1777 "msConvertWideStringToUTF8()",
1778 encoding,errormessage);
1779 iconv_close(cd);
1780 msFree(output);
1781 return NULL;
1782 }
1783 iconv_close(cd);
1784 } else {
1785 msSetError(MS_MISCERR, "Encoding not supported by libiconv (%s).",
1786 "msConvertWideStringToUTF8()",
1787 encoding);
1788 msFree(output);
1789 return NULL;
1790 }
1791
1792 } else {
1793 /* we were given a NULL wide string, nothing we can do here */
1794 return NULL;
1795 }
1796
1797 /* NULL-terminate the output string */
1798 output[nBufferSize - nOutSize] = '\0';
1799 return output;
1800#else
1801 msSetError(MS_MISCERR, "Not implemented since Iconv is not enabled.", "msConvertWideStringToUTF8()");
1802 return NULL;
1803#endif
1804}
1805
1806/*
1807** Returns the next glyph in string and advances *in_ptr to the next
1808** character.
1809**
1810** If out_string is not NULL then the character (bytes) is copied to this
1811** buffer and null-terminated. out_string must be a pre-allocated buffer of
1812** at least 11 bytes.
1813**
1814** The function returns the number of bytes in this glyph.
1815**
1816** This function treats 3 types of glyph encodings:
1817* - as an html entity, for example &#123; , &#x1af; , or &eacute;
1818* - as an utf8 encoded character
1819* - if utf8 decoding fails, as a raw character
1820*
1821** This function mimics the character decoding function used in gdft.c of
1822* libGD. It is necessary to have the same behaviour, as input strings must be
1823* split into the same glyphs as what gd does.
1824**
1825** In UTF-8, the number of leading 1 bits in the first byte specifies the
1826** number of bytes in the entire sequence.
1827** Source: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
1828**
1829** U-00000000 U-0000007F: 0xxxxxxx
1830** U-00000080 U-000007FF: 110xxxxx 10xxxxxx
1831** U-00000800 U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1832** U-00010000 U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1833** U-00200000 U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1834** U-04000000 U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1835*/
1836int msGetNextGlyph(const char **in_ptr, char *out_string)
1837{
1838 unsigned char in;
1839 int numbytes=0,unicode;
1840 int i;
1841
1842 in = (unsigned char)**in_ptr;
1843
1844 if (in == 0)
1845 return -1; /* Empty string */
1846 if((numbytes=msGetUnicodeEntity(*in_ptr,&unicode))>0) {
1847 if(out_string) {
1848 for(i=0;i<numbytes;i++) {
1849 out_string[i]=(*in_ptr)[i];
1850 }
1851 out_string[numbytes]='\0';
1852 }
1853 *in_ptr+=numbytes;
1854 return numbytes;
1855 }
1856 if (in < 0xC0)
1857 {/*
1858 * Handles properly formed UTF-8 characters between
1859 * 0x01 and 0x7F. Also treats \0 and naked trail
1860 * bytes 0x80 to 0xBF as valid characters representing
1861 * themselves.
1862 */
1863 /*goto end of loop to return just the char*/
1864 }
1865 else if (in < 0xE0)
1866 {
1867 if (((*in_ptr)[1]& 0xC0) == 0x80) {
1868 if(out_string) {
1869 out_string[0]=in;
1870 out_string[1]=(*in_ptr)[1];
1871 out_string[2]='\0';
1872 }
1873 *in_ptr+=2;
1874 return 2; /*110xxxxx 10xxxxxx*/
1875 }
1876 }
1877 else if (in < 0xF0)
1878 {
1879 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80) {
1880 if(out_string) {
1881 out_string[0]=in;
1882 *in_ptr+=numbytes; out_string[1]=(*in_ptr)[1];
1883 out_string[2]=(*in_ptr)[2];
1884 out_string[3]='\0';
1885 }
1886 *in_ptr+=3;
1887 return 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
1888 }
1889 }
1890 else if (in < 0xF8)
1891 {
1892 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1893 && ((*in_ptr)[3]& 0xC0) == 0x80) {
1894 if(out_string) {
1895 out_string[0]=in;
1896 out_string[1]=(*in_ptr)[1];
1897 out_string[2]=(*in_ptr)[2];
1898 out_string[3]=(*in_ptr)[3];
1899 out_string[4]='\0';
1900 }
1901 *in_ptr+=4;
1902 return 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
1903 }
1904 }
1905 else if (in < 0xFC)
1906 {
1907 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1908 && ((*in_ptr)[3]& 0xC0) == 0x80 && ((*in_ptr)[4]& 0xC0) == 0x80) {
1909 if(out_string) {
1910 out_string[0]=in;
1911 out_string[1]=(*in_ptr)[1];
1912 out_string[2]=(*in_ptr)[2];
1913 out_string[3]=(*in_ptr)[3];
1914 out_string[4]=(*in_ptr)[4];
1915 out_string[5]='\0';
1916 }
1917 *in_ptr+=5;
1918 return 5; /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
1919 }
1920 }
1921 else if (in < 0xFE)
1922 {
1923 if (((*in_ptr)[1]& 0xC0) == 0x80 && ((*in_ptr)[2]& 0xC0) == 0x80
1924 && ((*in_ptr)[3]& 0xC0) == 0x80 && ((*in_ptr)[4]& 0xC0) == 0x80
1925 && ((*in_ptr)[5]& 0xC0) == 0x80) {
1926 if(out_string) {
1927 out_string[0]=in;
1928 out_string[1]=(*in_ptr)[1];
1929 out_string[2]=(*in_ptr)[2];
1930 out_string[3]=(*in_ptr)[3];
1931 out_string[4]=(*in_ptr)[4];
1932 out_string[5]=(*in_ptr)[5];
1933 out_string[6]='\0';
1934 }
1935 *in_ptr+=6;
1936 return 6; /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
1937 }
1938 }
1939
1940 if (out_string) {
1941 out_string[0]=in;
1942 out_string[1] = '\0'; /* 0xxxxxxx */
1943 }
1944 (*in_ptr)++;
1945 return 1;
1946}
1947
1948/*
1949** Returns the number of glyphs in string
1950*/
1951int msGetNumGlyphs(const char *in_ptr)
1952{
1953 int numchars=0;
1954
1955 while( msGetNextGlyph(&in_ptr, NULL) != -1 )
1956 numchars++;
1957
1958 return numchars;
1959}
1960
1961static int cmp_entities(const void *e1, const void *e2) {
1962 struct mapentities_s *en1 = (struct mapentities_s *) e1;
1963 struct mapentities_s *en2 = (struct mapentities_s *) e2;
1964 return strcmp(en1->name, en2->name);
1965}
1966/*
1967 * this function tests if the string pointed by inptr represents
1968 * an HTML entity, in decimal form ( e.g. &#197;), in hexadecimal
1969 * form ( e.g. &#x6C34; ), or from html 4.0 spec ( e.g. &eacute; )
1970 * - returns returns 0 if the string doesn't represent such an entity.
1971 * - if the string does start with such entity,it returns the number of
1972 * bytes occupied by said entity, and stores the unicode value in *unicode
1973 */
1974int msGetUnicodeEntity(const char *inptr, int *unicode) {
1975 unsigned char *in = (unsigned char*)inptr;
1976 int l,val=0;
1977 if(*in=='&') {
1978 in++;
1979 if(*in=='#') {
1980 in++;
1981 if(*in=='x'||*in=='X') {
1982 in++;
1983 for(l=3;l<8;l++) {
1984 char byte;
1985 if(*in>='0'&&*in<='9')
1986 byte = *in - '0';
1987 else if(*in>='a'&&*in<='f')
1988 byte = *in - 'a' + 10;
1989 else if(*in>='A'&&*in<='F')
1990 byte = *in - 'A' + 10;
1991 else
1992 break;
1993 in++;
1994 val = (val * 16) + byte;
1995 }
1996 if(*in==';' && l>3 ) {
1997 *unicode=val;
1998 return ++l;
1999 }
2000 }
2001 else
2002 {
2003 for(l=2;l<8;l++) {
2004 if(*in>='0'&&*in<='9') {
2005 val = val*10+*in-'0';
2006 in++;
2007 }
2008 else
2009 break;
2010 }
2011 if(*in==';' && l>2 ) {
2012 *unicode=val;
2013 return ++l;
2014 }
2015 }
2016 }
2017 else
2018 {
2019 char entity_name_buf[MAP_ENTITY_NAME_LENGTH_MAX+1];
2020 char *p;
2021 struct mapentities_s key, *res;
2022 key.name = p = entity_name_buf;
2023 for (l = 1; l <= MAP_ENTITY_NAME_LENGTH_MAX+1; l++)
2024 {
2025 if (*in == '\0') /*end of string before possible entity: return*/
2026 break;
2027 if (*in == ';') /*possible end of entity: do a lookup*/
2028 {
2029 *p++ = '\0';
2030 res = bsearch(&key, mapentities, MAP_NR_OF_ENTITIES,
2031 sizeof(mapentities[0]), *cmp_entities);
2032 if (res)
2033 {
2034 *unicode = res->value;
2035 return ++l;
2036 }
2037 break; /*the string was of the form of an entity but didn't correspond to an existing one: return*/
2038 }
2039 *p++ = *in;
2040 in++;
2041 }
2042 }
2043 }
2044 return 0;
2045}
2046
2047/**
2048 * msStringIsInteger()
2049 *
2050 * determines whether a given string is an integer
2051 *
2052 * @param string the string to be tested
2053 *
2054 * @return MS_SUCCESS or MS_FAILURE
2055 */
2056
2057int msStringIsInteger(const char *string) {
2058 int length, i;
2059
2060 length = strlen(string);
2061
2062 if (length == 0)
2063 return MS_FAILURE;
2064
2065 for(i=0;i<length;i++) {
2066 if (!isdigit(string[i]))
2067 return MS_FAILURE;
2068 }
2069
2070 return MS_SUCCESS;
2071}
2072
2073/************************************************************************/
2074/* msStrdup() */
2075/************************************************************************/
2076
2077/* Safe version of msStrdup(). This function is taken from gdal/cpl. */
2078
2079char *msStrdup( const char * pszString )
2080{
2081 char *pszReturn;
2082
2083 if( pszString == NULL )
2084 pszString = "";
2085
2086 pszReturn = strdup( pszString );
2087
2088 if( pszReturn == NULL )
2089 {
2090 fprintf(stderr, "msSmallMsStrdup(): Out of memory allocating %ld bytes.\n",
2091 (long) strlen(pszString) );
2092 exit(1);
2093 }
2094
2095 return( pszReturn );
2096}
2097
2098
2099/************************************************************************/
2100/* msStringEscape() */
2101/************************************************************************/
2102
2103/* Checks if a string contains single or double quotes and escape them.
2104 NOTE: the user have to free the returned char */
2105
2106char* msStringEscape( const char * pszString )
2107{
2108 char *string_tmp, *string_ptr;
2109 int i;
2110
2111 if (pszString == NULL || strlen(pszString) == 0)
2112 return msStrdup("");
2113
2114 string_tmp = (char*)msSmallMalloc((strlen(pszString)*2)+1);
2115 for (string_ptr=(char*)pszString,i=0; *string_ptr!='\0'; ++string_ptr,++i) {
2116 if ( (*string_ptr == '\"') || (*string_ptr == '\'') ) {
2117 string_tmp[i] = '\\';
2118 ++i;
2119 }
2120 string_tmp[i] = *string_ptr;
2121 }
2122
2123 string_tmp[i] = '\0';
2124 return string_tmp;
2125}
2126
2127/************************************************************************/
2128/* msStringInArray() */
2129/************************************************************************/
2130
2131/* Check if a string is in a array */
2132int msStringInArray( const char * pszString, char **array, int numelements)
2133{
2134 int i;
2135 for (i=0;i<numelements;++i) {
2136 if (strcasecmp(pszString, array[i])==0)
2137 return MS_TRUE;
2138 }
2139 return MS_FALSE;
2140}
Note: See TracBrowser for help on using the repository browser.