Ticket #467: shp2pgsql_iconv_error_policy.patch3

File shp2pgsql_iconv_error_policy.patch3, 5.5 KB (added by agnat, 2 years ago)

more clean-up

Line 
1Index: loader/shp2pgsql-core.c
2===================================================================
3--- loader/shp2pgsql-core.c     (revision 5557)
4+++ loader/shp2pgsql-core.c     (working copy)
5@@ -40,7 +40,7 @@
6  * Internal functions
7  */
8 
9-char *utf8(const char *fromcode, char *inputbuf);
10+char *utf8(const SHPLOADERCONFIG *config, char *inputbuf);
11 void vasbappend(stringbuffer_t *sb, char *fmt, ... );
12 char *escape_copy_string(char *str);
13 char *escape_insert_string(char *str);
14@@ -77,7 +77,7 @@
15 
16 /* Return allocated string containing UTF8 string converted from encoding fromcode */
17 char *
18-utf8(const char *fromcode, char *inputbuf)
19+utf8(const SHPLOADERCONFIG *config, char *inputbuf)
20 {
21        iconv_t cd;
22        char *inbufptr = inputbuf;
23@@ -88,7 +88,7 @@
24 
25        inbytesleft = strlen(inputbuf);
26 
27-       cd = iconv_open("UTF-8", fromcode);
28+       cd = iconv_open(config->target_encoding, config->encoding);
29        if ( cd == ((iconv_t)(-1)) )
30                return NULL;
31 
32@@ -421,7 +421,7 @@
33        {
34                lwcollection = lwcollection_construct(MULTILINETYPE, state->config->sr_id, NULL, obj->nParts, lwmultilinestrings);
35                serialized_lwgeom = lwgeom_serialize(lwcollection_as_lwgeom(lwcollection));
36-               
37+
38        }
39        else
40        {
41@@ -871,6 +871,7 @@
42        config->createindex = 0;
43        config->readshape = 1;
44        config->encoding = strdup(ENCODING_DEFAULT);
45+       config->target_encoding = DEFAULT_ICONV_POLICY;
46        config->null_policy = POLICY_NULL_INSERT;
47        config->sr_id = -1;
48        config->hwgeom = 0;
49@@ -1163,10 +1164,10 @@
50                if (state->config->encoding)
51                {
52                        /* If we are converting from another encoding to UTF8, convert the field name to UTF8 */
53-                       utf8str = utf8(state->config->encoding, name);
54+                       utf8str = utf8(state->config, name);
55                        if (!utf8str)
56                        {
57-                               snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" from %s encoding to UTF-8: iconv reports \"%s\"", name, state->config->encoding, strerror(errno));
58+                               snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field name \"%s\" from %s encoding to %s: iconv reports \"%s\"", name, state->config->encoding, state->config->target_encoding, strerror(errno));
59                                return SHPLOADERERR;
60                        }
61 
62@@ -1590,10 +1591,10 @@
63                        if (state->config->encoding)
64                        {
65                                /* If we are converting from another encoding to UTF8, convert the field value to UTF8 */
66-                               utf8str = utf8(state->config->encoding, val);
67+                               utf8str = utf8(state->config, val);
68                                if (!utf8str)
69                                {
70-                                       snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field value \"%s\" from %s encoding to UTF-8: iconv reports \"%s\"", val, state->config->encoding, strerror(errno));
71+                                       snprintf(state->message, SHPLOADERMSGLEN, "Unable to convert field value \"%s\" from %s encoding to %s: iconv reports \"%s\"", val, state->config->encoding, state->config->target_encoding, strerror(errno));
72                                        return SHPLOADERERR;
73                                }
74 
75Index: loader/shp2pgsql-core.h
76===================================================================
77--- loader/shp2pgsql-core.h     (revision 5557)
78+++ loader/shp2pgsql-core.h     (working copy)
79@@ -40,6 +40,15 @@
80 
81 
82 /*
83+ * Error policies for iconv
84+ */
85+
86+#define ICONV_POLICY_ERROR      "UTF-8"
87+#define ICONV_POLICY_IGNORE     "UTF-8//IGNORE"
88+
89+#define DEFAULT_ICONV_POLICY    ICONV_POLICY_ERROR
90+
91+/*
92  * Error message handling
93  */
94 
95@@ -71,7 +80,7 @@
96 #define ENCODING_DEFAULT "WINDOWS-1252"
97 
98 /*
99- * Structure to hold the loader configuration options
100+ * Structure to hold the loader configuration options
101  */
102 
103 typedef struct shp_loader_config
104@@ -86,7 +95,7 @@
105        char *schema;
106 
107        /* geometry column name to use */
108-       char *geom;
109+       char *geom;
110 
111        /* the shape file (without the .shp extension) */
112        char *shp_file;
113@@ -96,7 +105,7 @@
114 
115        /* 0 = MULTIPOLYGON/MULTILINESTRING, 1 = force to POLYGON/LINESTRING */
116        int simple_geometries;
117-       
118+
119        /* 0 = geometry, 1 = geography */
120        int geography;
121 
122@@ -115,6 +124,9 @@
123        /* iconv encoding name */
124        char *encoding;
125 
126+       /* always UTF-8 but with different iconv error policies */
127+       const char *target_encoding;
128+
129        /* how to handle nulls */
130        int null_policy;
131 
132@@ -128,7 +140,7 @@
133 
134 
135 /*
136- * Structure to holder the current loader state
137+ * Structure to holder the current loader state
138  */
139 
140 typedef struct shp_loader_state
141@@ -138,7 +150,7 @@
142 
143        /* Shapefile handle */
144        SHPHandle hSHPHandle;
145-       
146+
147        /* Shapefile type */
148        int shpfiletype;
149 
150@@ -182,6 +194,8 @@
151        /* Last (error) message */
152        char message[SHPLOADERMSGLEN];
153 
154+       /* iconv conversion descriptor */
155+       iconv_t conv_desc;
156 } SHPLOADERSTATE;
157 
158 
159Index: loader/shp2pgsql-cli.c
160===================================================================
161--- loader/shp2pgsql-cli.c      (revision 5557)
162+++ loader/shp2pgsql-cli.c      (working copy)
163@@ -41,6 +41,7 @@
164        printf("  -S  Generate simple geometries instead of MULTI geometries.\n");
165        printf("  -W <encoding> Specify the character encoding of Shape's\n");
166        printf("     attribute column. (default : \""ENCODING_DEFAULT"\")\n");
167+       printf("  -C  Ignore character encoding errors in dbf file\n");
168        printf("  -N <policy> NULL geometries handling policy (insert*,skip,abort)\n");
169        printf("  -n  Only import DBF file.\n");
170        printf("  -?  Display this help screen.\n");
171@@ -68,7 +69,7 @@
172        config = malloc(sizeof(SHPLOADERCONFIG));
173        set_config_defaults(config);
174 
175-       while ((c = pgis_getopt(argc, argv, "kcdapGDs:Sg:iW:wIN:n")) != EOF)
176+       while ((c = pgis_getopt(argc, argv, "kcdapGDs:Sg:iW:wIN:C")) != EOF)
177        {
178                switch (c)
179                {
180@@ -132,6 +133,9 @@
181                        config->encoding = pgis_optarg;
182                        break;
183 
184+               case 'C':
185+                       config->target_encoding = ICONV_POLICY_IGNORE;
186+                       break;
187                case 'N':
188                        switch (pgis_optarg[0])
189                        {