root/spike/wktraster/doc/RFC1-SerializedFormat

Revision 5861, 8.2 KB (checked in by mloskot, 21 months ago)

More PT_16BF cleanup missing from the original commits (#226)

  • Property svn:keywords set to Author Date Id Revision
Line 
1RFC1: serialized format (storage) for RASTER type
2------------------------------------------------------
3$Author$
4$Date$
5$Revision$
6------------------------------------------------------
7
8The goals of the serialized version for RASTER type are:
9
10 - Small memory footprint on deserialization
11   This means that the amount of allocated memory
12   required for deserialization is minimal
13
14 - Fast access
15   Access to band data must be aligned, saving from
16   memory copies on full scan.
17
18 - Ease of format switch
19   On-disk format must be allowed to change
20   w/out need for dump-reload of the whole
21   database.
22
23The first two goals boil down to forcing alignment of band
24data in the serialized format itself, which in turn will
25require variable padding based on pixeltype of each band.
26
27For simplicity we will ensure that each band of the
28raster starts at the 8-byte boundary and thus pad
29previous structures in the stream accordingly.
30
31The structure will then look like this:
32
33 [HEADER]  [BAND0]    [BAND1]    [BAND2]
34           ^aligned   ^aligned   ^aligned
35
36The third goal can be accomplished by adding a version
37number to the serialized format so that in case of changes
38the deserializer can pick the correct parsing procedure
39based on that.
40
41The HEADER
42----------
43
44PostgreSQL forces a 4-byte size field a the start of
45the detoasted datum, and ensure this start of structure
46is aligned to 8-byte.  We'll add version number right after it,
47and then make sure the total size is a multiple of 8 bytes.
48
49 The following structure is composed by 8 slots of 8-bytes,
50 totaling 64 bytes:
51
52 struct rt_raster_serialized_t {
53
54    /*---[ 8 byte boundary ]---{ */
55    uint32_t size;    /* required by postgresql: 4 bytes */
56    uint16_t version; /* format version (this is version 0): 2 bytes */
57    uint16_t numBands; /* Number of bands: 2 bytes */
58
59    /* }---[ 8 byte boundary ]---{ */
60    double scaleX; /* pixel width: 8 bytes */
61
62    /* }---[ 8 byte boundary ]---{ */
63    double scaleY; /* pixel height: 8 bytes */
64
65    /* }---[ 8 byte boundary ]---{ */
66    double ipX; /* insertion point X: 8 bytes */
67
68    /* }---[ 8 byte boundary ]---{ */
69    double ipY; /* insertion point Y: 8 bytes */
70
71    /* }---[ 8 byte boundary ]---{ */
72    double skewX;  /* rotation about the X axis: 8 bytes */
73
74    /* }---[ 8 byte boundary ]---{ */
75    double skewY;  /* rotation about the Y axis: 8 bytes */
76
77    /* }---[ 8 byte boundary ]--- */
78    int32_t srid; /* Spatial reference id: 4 bytes */
79    uint16_t width;  /* pixel columns: 2 bytes */
80    uint16_t height; /* pixel rows: 2 bytes */
81 };
82
83The BANDS
84---------
85
86Given the serialized raster header structure above, it
87is guaranteed that a serialized band always start at 8-bytes
88boundary, so it's simpler to compute padding required at
89the end of each band to ensure next band will be guaranteed
90the same assumption.
91
92We'll need to take 2 padding spots into account:
93the first is to ensure actual band data is aligned accordingly
94to the pixel type (and storage flag) needs, the second is to
95ensure next band (if any) will also be aligned to 8-bytes:
96
97 [PIXELTYPE+STORAGE_FLAG] [DATA_PADDING] [DATA] [TRAILING_PADDING]
98
99The total size of a band's serialized form in bytes
100must be a multiple of 8.
101
102The maximum required data padding size will be of 7 bytes
103(64bit pixel type). The maximum required trailing padding size
104will be of 7 bytes.
105
106 Pixel type and storage flag
107 ---------------------------
108
109 Pixel type specifies type of pixel values in a band.
110 Storage flag specifies whether the band data is stored
111 as part of the datum or is to be found on the server's
112 filesytem.
113
114 There are currently 11 supported pixel value types, so 4
115 bits are enough to account for all. We'll reserve
116 the upper 4 bits for generic flags and define upmost as
117 storage flag:
118 
119 #define BANDTYPE_FLAGS_MASK 0xF0
120 #define BANDTYPE_PIXTYPE_MASK 0x0F
121
122 #define BANDTYPE_FLAG_OFFDB     (1<<7)
123 #define BANDTYPE_FLAG_HASNODATA (1<<6)
124 #define BANDTYPE_FLAG_RESERVED2 (1<<5)
125 #define BANDTYPE_FLAG_RESERVED3 (1<<4)
126
127 Data padding
128 ------------
129
130 Band alignment depends on pixeltypes, as follows:
131
132    - PT_1BB, PT_2BUI, PT_4BUI, PT_8BSI, PT_8BUI:
133      No alignment required, each value is 1 byte.
134
135    - PT_16BSI, PT_16BUI:
136      Data must be aligned to 2-bytes boundary.
137
138    - PT_32BSI, PT_32BUI, PT_32BF:
139      Data must be aligned to 4-bytes boundary.
140
141    - PT_64BF:
142      Data must be aligned to 8-bytes boundary.
143 
144 Accordingly we can then define the following structures:
145
146      struct rt_band8_serialized_t {
147            uint8_t pixeltype;
148            uint8_t data[1]; /* no data padding */
149      }
150
151      struct rt_band16_serialized_t {
152            uint8_t pixeltype;
153            uint8_t padding; /* 1-byte padding */
154            uint8_t data[1];
155      }
156
157      struct rt_band32_serialized_t {
158            uint8_t pixeltype;
159            uint8_t padding[3]; /* 3-bytes padding */
160            uint8_t data[1];
161      }
162
163      struct rt_band64_serialized_t {
164            uint8_t pixeltype;
165            uint8_t padding[7]; /* 7-bytes padding */
166            uint8_t data[1];
167      }
168
169 And an abstract base class:
170
171      struct rt_band_serialized_t {
172            uint8_t pixeltype
173      }
174
175 Data
176 ----
177
178 The band data - guaranteed to be always aligned as required by
179 pixeltype - will start with the nodata value.
180 After that we may have pixel values or off-db raster reference
181 depending on OFFDB flag in the pixeltype field:
182
183 * For in-db bands the nodata value is followed by a value
184   for each column in first row, then in second row and so on.
185   For example, a 2x2 raster band data will have this form:
186
187      [nodata] [x:0,y:0] [x:1,y:0] [x:0,y:1] [x:1,y:1]
188
189   Where the size of the [...] blocks is 1,2,4 or 8 bytes depending
190   on pixeltype. Endiannes of multi-bytes value is the host endiannes.
191
192 * For off-db bands the nodata value is followed by a band number
193   followed by a null-terminated string expressing the path to
194   the raster file:
195
196      [nodata] [bandno] [path]
197
198   Where the size of the [nodata] block is 1,2,4 or 8 bytes depending
199   on pixeltype (endiannes of multi-bytes value is the host endiannes),
200   size of [bandno] is 1 byte, and [path] is null-terminated.
201   
202
203 Trailing padding
204 ----------------
205
206 The trailing band padding is used to ensure next band (if any)
207 will start on the 8-bytes boundary.
208 It is both dependent on raster dimensions (number of values)
209 and band data pixel type (size of each value).
210
211 In order to obtain the required padding size for a band
212 we'll need to compute the minimum size required to hold the band
213 data, add the data padding and pixeltype sizes, and then grow
214 the resulting size to reach a multiple of 8 bytes:
215
216    size_t
217    rt_band_serialized_size(rt_context ctx, rt_band band)
218    {
219        rt_pixtype pixtype = rt_band_get_pixtype(ctx, band);
220        size_t sz;
221
222        /* pixeltype + data padding */
223        sz = rt_pixtype_alignment(ctx, pixtype);
224
225        /* add data size */
226        sz += rt_band_get_data_size(ctx, band);
227
228        /* grow size to reach a multiple of 8 bytes */
229        sz = TYPEALIGN(sz, 8);
230
231        assert( !(sz%8) );
232
233        return sz;
234    }
235
236
237Example sizes
238-------------
239
240255x255 single band PT_16BUI:
241    header size:                        64 +
242    pixeltype+data_padding:              2 +
243    data size:     (255*255+1)*2 == 130052 =
244                                    130118 +
245    trailing padding:                    2 =
246    total size:                     130120 (~127k)
247
248255x255 single band PT_8BUI:
249    header size:                        64 +
250    pixeltype+data_padding:              1 +
251    data size:        (255*255+1) == 65026 =
252                                     65091 +
253    trailing padding:                    5 =
254    total size:                      65096 (~63k)
255
25664x64 single band PT_16BSI:
257    header size:                        64 +
258    pixeltype+data_padding:              2 +
259    data size:         (64*64+1)*2 == 8194 =
260                                      8260 +
261    trailing padding:                    4 =
262    total size:                       8264 (~8k -- >page size)
263
26464x64 single band PT_8BUI:
265    header size:                        64 +
266    pixeltype+data_padding:              1 +
267    data size:           (64*64+1) == 4097 =
268                                      4162 +
269    trailing padding:                    6 =
270    total size:                       4168 (~4k)
Note: See TracBrowser for help on using the browser.