source: trunk/gdal/alg/gdalwarpkernel.cpp

Last change on this file was 41310, checked in by Kurt Schwehr, 7 years ago

DBL_MAX -> std::numeric_limits in gdalwarpkernel.cpp

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 231.0 KB
Line 
1/******************************************************************************
2 *
3 * Project: High Performance Image Reprojector
4 * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 * image warping for a "chunk" of input and output imagery already
6 * loaded into memory.
7 * Author: Frank Warmerdam, warmerdam@pobox.com
8 *
9 ******************************************************************************
10 * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 * Copyright (c) 2008-2013, Even Rouault <even dot rouault at mines-paris dot org>
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a
14 * copy of this software and associated documentation files (the "Software"),
15 * to deal in the Software without restriction, including without limitation
16 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 * and/or sell copies of the Software, and to permit persons to whom the
18 * Software is furnished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included
21 * in all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 * DEALINGS IN THE SOFTWARE.
30 ****************************************************************************/
31
32#include "cpl_port.h"
33#include "gdalwarper.h"
34
35#include <cfloat>
36#include <cmath>
37#include <cstddef>
38#include <cstdlib>
39#include <cstring>
40
41#include <algorithm>
42#include <limits>
43#include <new>
44#include <vector>
45
46#include "cpl_atomic_ops.h"
47#include "cpl_conv.h"
48#include "cpl_error.h"
49#include "cpl_multiproc.h"
50#include "cpl_progress.h"
51#include "cpl_string.h"
52#include "cpl_vsi.h"
53#include "cpl_worker_thread_pool.h"
54#include "gdal.h"
55#include "gdal_alg.h"
56#include "gdal_alg_priv.h"
57#include "gdalwarpkernel_opencl.h"
58
59// We restrict to 64bit processors because they are guaranteed to have SSE2.
60// Could possibly be used too on 32bit, but we would need to check at runtime.
61#if defined(__x86_64) || defined(_M_X64)
62#include <gdalsse_priv.h>
63
64#if __SSE4_1__
65#include <smmintrin.h>
66#endif
67
68#if __SSE3__
69#include <pmmintrin.h>
70#endif
71
72#endif
73
74CPL_CVSID("$Id: gdalwarpkernel.cpp 41310 2018-01-22 06:16:04Z goatbar $")
75
76constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
77constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
78
79// #define INSTANTIATE_FLOAT64_SSE2_IMPL
80
81static const int anGWKFilterRadius[] =
82{
83 0, // Nearest neighbour
84 1, // Bilinear
85 2, // Cubic Convolution (Catmull-Rom)
86 2, // Cubic B-Spline
87 3, // Lanczos windowed sinc
88 0, // Average
89 0, // Mode
90 0, // Reserved GRA_Gauss=7
91 0, // Max
92 0, // Min
93 0, // Med
94 0, // Q1
95 0, // Q3
96};
97
98static double GWKBilinear(double dfX);
99static double GWKCubic(double dfX);
100static double GWKBSpline(double dfX);
101static double GWKLanczosSinc(double dfX);
102
103static const FilterFuncType apfGWKFilter[] =
104{
105 nullptr, // Nearest neighbour
106 GWKBilinear, // Bilinear
107 GWKCubic, // Cubic Convolution (Catmull-Rom)
108 GWKBSpline, // Cubic B-Spline
109 GWKLanczosSinc, // Lanczos windowed sinc
110 nullptr, // Average
111 nullptr, // Mode
112 nullptr, // Reserved GRA_Gauss=7
113 nullptr, // Max
114 nullptr, // Min
115 nullptr, // Med
116 nullptr, // Q1
117 nullptr, // Q3
118};
119
120// TODO(schwehr): Can we make these functions have a const * const arg?
121static double GWKBilinear4Values(double* padfVals);
122static double GWKCubic4Values(double* padfVals);
123static double GWKBSpline4Values(double* padfVals);
124static double GWKLanczosSinc4Values(double* padfVals);
125
126static const FilterFunc4ValuesType apfGWKFilter4Values[] =
127{
128 nullptr, // Nearest neighbour
129 GWKBilinear4Values, // Bilinear
130 GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
131 GWKBSpline4Values, // Cubic B-Spline
132 GWKLanczosSinc4Values, // Lanczos windowed sinc
133 nullptr, // Average
134 nullptr, // Mode
135 nullptr, // Reserved GRA_Gauss=7
136 nullptr, // Max
137 nullptr, // Min
138 nullptr, // Med
139 nullptr, // Q1
140 nullptr, // Q3
141};
142
143int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
144{
145 return anGWKFilterRadius[eResampleAlg];
146}
147
148FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
149{
150 return apfGWKFilter[eResampleAlg];
151}
152
153FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
154{
155 return apfGWKFilter4Values[eResampleAlg];
156}
157
158#ifdef HAVE_OPENCL
159static CPLErr GWKOpenCLCase( GDALWarpKernel * );
160#endif
161
162static CPLErr GWKGeneralCase( GDALWarpKernel * );
163static CPLErr GWKRealCase( GDALWarpKernel *poWK );
164static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK );
165static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK );
166static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK );
167static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK );
168#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
169static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK );
170#endif
171static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK );
172static CPLErr GWKNearestByte( GDALWarpKernel *poWK );
173static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK );
174static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK );
175static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK );
176#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK );
178#endif
179static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK );
180static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK );
181static CPLErr GWKNearestShort( GDALWarpKernel *poWK );
182static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK );
183static CPLErr GWKNearestFloat( GDALWarpKernel *poWK );
184static CPLErr GWKAverageOrMode( GDALWarpKernel * );
185static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * );
186static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * );
187static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel * );
188
189/************************************************************************/
190/* GWKJobStruct */
191/************************************************************************/
192
193typedef struct _GWKJobStruct GWKJobStruct;
194
195struct _GWKJobStruct
196{
197 GDALWarpKernel *poWK;
198 int iYMin;
199 int iYMax;
200 volatile int *pnCounter;
201 volatile int *pbStop;
202 CPLCond *hCond;
203 CPLMutex *hCondMutex;
204 int (*pfnProgress)(GWKJobStruct* psJob);
205 void *pTransformerArg;
206
207 // Just used during thread initialization phase.
208 GDALTransformerFunc pfnTransformerInit;
209 void *pTransformerArgInit;
210} ;
211
212/************************************************************************/
213/* GWKProgressThread() */
214/************************************************************************/
215
216// Return TRUE if the computation must be interrupted.
217static int GWKProgressThread( GWKJobStruct* psJob )
218{
219 CPLAcquireMutex(psJob->hCondMutex, 1.0);
220 (*(psJob->pnCounter))++;
221 CPLCondSignal(psJob->hCond);
222 int bStop = *(psJob->pbStop);
223 CPLReleaseMutex(psJob->hCondMutex);
224
225 return bStop;
226}
227
228/************************************************************************/
229/* GWKProgressMonoThread() */
230/************************************************************************/
231
232// Return TRUE if the computation must be interrupted.
233static int GWKProgressMonoThread( GWKJobStruct* psJob )
234{
235 GDALWarpKernel *poWK = psJob->poWK;
236 int nCounter = ++(*(psJob->pnCounter));
237 if( !poWK->pfnProgress( poWK->dfProgressBase + poWK->dfProgressScale *
238 (nCounter / static_cast<double>(psJob->iYMax)),
239 "", poWK->pProgress ) )
240 {
241 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" );
242 *(psJob->pbStop) = TRUE;
243 return TRUE;
244 }
245 return FALSE;
246}
247
248/************************************************************************/
249/* GWKGenericMonoThread() */
250/************************************************************************/
251
252static CPLErr GWKGenericMonoThread( GDALWarpKernel *poWK,
253 void (*pfnFunc) (void *pUserData) )
254{
255 volatile int bStop = FALSE;
256 volatile int nCounter = 0;
257
258 GWKJobStruct sThreadJob;
259 sThreadJob.poWK = poWK;
260 sThreadJob.pnCounter = &nCounter;
261 sThreadJob.iYMin = 0;
262 sThreadJob.iYMax = poWK->nDstYSize;
263 sThreadJob.pbStop = &bStop;
264 sThreadJob.hCond = nullptr;
265 sThreadJob.hCondMutex = nullptr;
266 sThreadJob.pfnProgress = GWKProgressMonoThread;
267 sThreadJob.pTransformerArg = poWK->pTransformerArg;
268
269 pfnFunc(&sThreadJob);
270
271 return !bStop ? CE_None : CE_Failure;
272}
273
274/************************************************************************/
275/* GWKThreadInitTransformer() */
276/************************************************************************/
277
278static void GWKThreadInitTransformer(void* pData)
279{
280 GWKJobStruct* psJob = (GWKJobStruct*)pData;
281 if( psJob->pTransformerArg == nullptr )
282 psJob->pTransformerArg =
283 GDALCloneTransformer(psJob->pTransformerArgInit);
284 if( psJob->pTransformerArg != nullptr )
285 {
286 // In case of lazy opening (for example RPCDEM), do a dummy
287 // transformation to be sure that the DEM is really opened with the
288 // context of this thread.
289 double dfX = 0.5;
290 double dfY = 0.5;
291 double dfZ = 0.0;
292 int bSuccess = FALSE;
293 CPLPushErrorHandler(CPLQuietErrorHandler);
294 psJob->pfnTransformerInit(psJob->pTransformerArg, TRUE, 1,
295 &dfX, &dfY, &dfZ, &bSuccess );
296 CPLPopErrorHandler();
297 }
298}
299
300/************************************************************************/
301/* GWKThreadsCreate() */
302/************************************************************************/
303
304typedef struct
305{
306 CPLWorkerThreadPool* poThreadPool;
307 GWKJobStruct* pasThreadJob;
308 CPLCond* hCond;
309 CPLMutex* hCondMutex;
310} GWKThreadData;
311
312void* GWKThreadsCreate( char** papszWarpOptions,
313 GDALTransformerFunc pfnTransformer,
314 void* pTransformerArg )
315{
316 const char* pszWarpThreads =
317 CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
318 if( pszWarpThreads == nullptr )
319 pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
320
321 int nThreads = 0;
322 if( EQUAL(pszWarpThreads, "ALL_CPUS") )
323 nThreads = CPLGetNumCPUs();
324 else
325 nThreads = atoi(pszWarpThreads);
326 if( nThreads <= 1 )
327 nThreads = 0;
328 if( nThreads > 128 )
329 nThreads = 128;
330
331 GWKThreadData* psThreadData = static_cast<GWKThreadData *>(
332 VSI_CALLOC_VERBOSE(1, sizeof(GWKThreadData)));
333 if( psThreadData == nullptr )
334 return nullptr;
335
336 CPLCond* hCond = nullptr;
337 if( nThreads )
338 hCond = CPLCreateCond();
339 if( nThreads && hCond )
340 {
341/* -------------------------------------------------------------------- */
342/* Duplicate pTransformerArg per thread. */
343/* -------------------------------------------------------------------- */
344 bool bTransformerCloningSuccess = true;
345
346 psThreadData->hCond = hCond;
347 psThreadData->pasThreadJob = static_cast<GWKJobStruct *>(
348 VSI_CALLOC_VERBOSE(sizeof(GWKJobStruct), nThreads));
349 if( psThreadData->pasThreadJob == nullptr )
350 {
351 GWKThreadsEnd(psThreadData);
352 return nullptr;
353 }
354
355 psThreadData->hCondMutex = CPLCreateMutex();
356 if( psThreadData->hCondMutex == nullptr )
357 {
358 GWKThreadsEnd(psThreadData);
359 return nullptr;
360 }
361 CPLReleaseMutex(psThreadData->hCondMutex);
362
363 std::vector<void*> apInitData;
364 for( int i = 0; i < nThreads; i++ )
365 {
366 psThreadData->pasThreadJob[i].hCond = psThreadData->hCond;
367 psThreadData->pasThreadJob[i].hCondMutex = psThreadData->hCondMutex;
368 psThreadData->pasThreadJob[i].pfnTransformerInit = pfnTransformer;
369 psThreadData->pasThreadJob[i].pTransformerArgInit = pTransformerArg;
370 if( i == 0 )
371 psThreadData->pasThreadJob[i].pTransformerArg = pTransformerArg;
372 else
373 psThreadData->pasThreadJob[i].pTransformerArg = nullptr;
374 apInitData.push_back(&(psThreadData->pasThreadJob[i]));
375 }
376
377 psThreadData->poThreadPool = new (std::nothrow) CPLWorkerThreadPool();
378 if( psThreadData->poThreadPool == nullptr ||
379 !psThreadData->poThreadPool->Setup(nThreads,
380 GWKThreadInitTransformer,
381 &apInitData[0]) )
382 {
383 GWKThreadsEnd(psThreadData);
384 return nullptr;
385 }
386
387 for( int i = 1; i < nThreads; i++ )
388 {
389 if( psThreadData->pasThreadJob[i].pTransformerArg == nullptr )
390 {
391 CPLDebug("WARP", "Cannot deserialize transformer");
392 bTransformerCloningSuccess = false;
393 break;
394 }
395 }
396
397 if( !bTransformerCloningSuccess )
398 {
399 for( int i = 1; i < nThreads; i++ )
400 {
401 if( psThreadData->pasThreadJob[i].pTransformerArg )
402 GDALDestroyTransformer(psThreadData->
403 pasThreadJob[i].pTransformerArg);
404 }
405 CPLFree(psThreadData->pasThreadJob);
406 psThreadData->pasThreadJob = nullptr;
407 delete psThreadData->poThreadPool;
408 psThreadData->poThreadPool = nullptr;
409
410 CPLDebug("WARP", "Cannot duplicate transformer function. "
411 "Falling back to mono-thread computation");
412 }
413 }
414
415 return psThreadData;
416}
417
418/************************************************************************/
419/* GWKThreadsEnd() */
420/************************************************************************/
421
422void GWKThreadsEnd( void* psThreadDataIn )
423{
424 if( psThreadDataIn == nullptr )
425 return;
426
427 GWKThreadData* psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
428 if( psThreadData->poThreadPool )
429 {
430 const int nThreads = psThreadData->poThreadPool->GetThreadCount();
431 for( int i = 1; i < nThreads; i++ )
432 {
433 if( psThreadData->pasThreadJob[i].pTransformerArg )
434 GDALDestroyTransformer(psThreadData->
435 pasThreadJob[i].pTransformerArg);
436 }
437 delete psThreadData->poThreadPool;
438 }
439 CPLFree(psThreadData->pasThreadJob);
440 if( psThreadData->hCond )
441 CPLDestroyCond(psThreadData->hCond);
442 if( psThreadData->hCondMutex )
443 CPLDestroyMutex(psThreadData->hCondMutex);
444 CPLFree(psThreadData);
445}
446
447/************************************************************************/
448/* GWKRun() */
449/************************************************************************/
450
451static CPLErr GWKRun( GDALWarpKernel *poWK,
452 const char* pszFuncName,
453 void (*pfnFunc) (void *pUserData) )
454
455{
456 const int nDstYSize = poWK->nDstYSize;
457
458 CPLDebug("GDAL", "GDALWarpKernel()::%s() "
459 "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
460 pszFuncName,
461 poWK->nSrcXOff, poWK->nSrcYOff,
462 poWK->nSrcXSize, poWK->nSrcYSize,
463 poWK->nDstXOff, poWK->nDstYOff,
464 poWK->nDstXSize, poWK->nDstYSize);
465
466 if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) )
467 {
468 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" );
469 return CE_Failure;
470 }
471
472 GWKThreadData* psThreadData =
473 static_cast<GWKThreadData*>(poWK->psThreadData);
474 if( psThreadData == nullptr || psThreadData->poThreadPool == nullptr )
475 {
476 return GWKGenericMonoThread(poWK, pfnFunc);
477 }
478
479 int nThreads =
480 std::min(psThreadData->poThreadPool->GetThreadCount(), nDstYSize / 2);
481 // Config option mostly useful for tests to be able to test multithreading
482 // with small rasters
483 const int nWarpChunkSize = atoi(
484 CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
485 if( nWarpChunkSize > 0 )
486 {
487 GIntBig nChunks =
488 static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
489 if( nThreads > nChunks )
490 nThreads = static_cast<int>(nChunks);
491 }
492 if( nThreads <= 0 )
493 nThreads = 1;
494
495 CPLDebug("WARP", "Using %d threads", nThreads);
496
497 volatile int bStop = FALSE;
498 volatile int nCounter = 0;
499
500 CPLAcquireMutex(psThreadData->hCondMutex, 1000);
501
502/* -------------------------------------------------------------------- */
503/* Submit jobs */
504/* -------------------------------------------------------------------- */
505 for( int i = 0; i < nThreads; i++ )
506 {
507 psThreadData->pasThreadJob[i].poWK = poWK;
508 psThreadData->pasThreadJob[i].pnCounter = &nCounter;
509 psThreadData->pasThreadJob[i].iYMin =
510 static_cast<int>((static_cast<GIntBig>(i)) * nDstYSize / nThreads);
511 psThreadData->pasThreadJob[i].iYMax =
512 static_cast<int>((static_cast<GIntBig>(i + 1)) *
513 nDstYSize / nThreads);
514 psThreadData->pasThreadJob[i].pbStop = &bStop;
515 if( poWK->pfnProgress != GDALDummyProgress )
516 psThreadData->pasThreadJob[i].pfnProgress = GWKProgressThread;
517 else
518 psThreadData->pasThreadJob[i].pfnProgress = nullptr;
519 psThreadData->poThreadPool->SubmitJob( pfnFunc,
520 (void*) &psThreadData->pasThreadJob[i] );
521 }
522
523/* -------------------------------------------------------------------- */
524/* Report progress. */
525/* -------------------------------------------------------------------- */
526 if( poWK->pfnProgress != GDALDummyProgress )
527 {
528 while( nCounter < nDstYSize )
529 {
530 CPLCondWait(psThreadData->hCond, psThreadData->hCondMutex);
531
532 if( !poWK->pfnProgress(
533 poWK->dfProgressBase + poWK->dfProgressScale *
534 (nCounter / static_cast<double>(nDstYSize)),
535 "", poWK->pProgress ) )
536 {
537 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" );
538 bStop = TRUE;
539 break;
540 }
541 }
542 }
543
544 /* Release mutex before joining threads, otherwise they will dead-lock */
545 /* forever in GWKProgressThread() */
546 CPLReleaseMutex(psThreadData->hCondMutex);
547
548/* -------------------------------------------------------------------- */
549/* Wait for all jobs to complete. */
550/* -------------------------------------------------------------------- */
551 psThreadData->poThreadPool->WaitCompletion();
552
553 return !bStop ? CE_None : CE_Failure;
554}
555
556/************************************************************************/
557/* ==================================================================== */
558/* GDALWarpKernel */
559/* ==================================================================== */
560/************************************************************************/
561
562/**
563 * \class GDALWarpKernel "gdalwarper.h"
564 *
565 * Low level image warping class.
566 *
567 * This class is responsible for low level image warping for one
568 * "chunk" of imagery. The class is essentially a structure with all
569 * data members public - primarily so that new special-case functions
570 * can be added without changing the class declaration.
571 *
572 * Applications are normally intended to interactive with warping facilities
573 * through the GDALWarpOperation class, though the GDALWarpKernel can in
574 * theory be used directly if great care is taken in setting up the
575 * control data.
576 *
577 * <h3>Design Issues</h3>
578 *
579 * The intention is that PerformWarp() would analyze the setup in terms
580 * of the datatype, resampling type, and validity/density mask usage and
581 * pick one of many specific implementations of the warping algorithm over
582 * a continuum of optimization vs. generality. At one end there will be a
583 * reference general purpose implementation of the algorithm that supports
584 * any data type (working internally in double precision complex), all three
585 * resampling types, and any or all of the validity/density masks. At the
586 * other end would be highly optimized algorithms for common cases like
587 * nearest neighbour resampling on GDT_Byte data with no masks.
588 *
589 * The full set of optimized versions have not been decided but we should
590 * expect to have at least:
591 * - One for each resampling algorithm for 8bit data with no masks.
592 * - One for each resampling algorithm for float data with no masks.
593 * - One for each resampling algorithm for float data with any/all masks
594 * (essentially the generic case for just float data).
595 * - One for each resampling algorithm for 8bit data with support for
596 * input validity masks (per band or per pixel). This handles the common
597 * case of nodata masking.
598 * - One for each resampling algorithm for float data with support for
599 * input validity masks (per band or per pixel). This handles the common
600 * case of nodata masking.
601 *
602 * Some of the specializations would operate on all bands in one pass
603 * (especially the ones without masking would do this), while others might
604 * process each band individually to reduce code complexity.
605 *
606 * <h3>Masking Semantics</h3>
607 *
608 * A detailed explanation of the semantics of the validity and density masks,
609 * and their effects on resampling kernels is needed here.
610 */
611
612/************************************************************************/
613/* GDALWarpKernel Data Members */
614/************************************************************************/
615
616/**
617 * \var GDALResampleAlg GDALWarpKernel::eResample;
618 *
619 * Resampling algorithm.
620 *
621 * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
622 * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, or GRA_Mode.
623 *
624 * This field is required. GDT_NearestNeighbour may be used as a default
625 * value.
626 */
627
628/**
629 * \var GDALDataType GDALWarpKernel::eWorkingDataType;
630 *
631 * Working pixel data type.
632 *
633 * The datatype of pixels in the source image (papabySrcimage) and
634 * destination image (papabyDstImage) buffers. Note that operations on
635 * some data types (such as GDT_Byte) may be much better optimized than other
636 * less common cases.
637 *
638 * This field is required. It may not be GDT_Unknown.
639 */
640
641/**
642 * \var int GDALWarpKernel::nBands;
643 *
644 * Number of bands.
645 *
646 * The number of bands (layers) of imagery being warped. Determines the
647 * number of entries in the papabySrcImage, papanBandSrcValid,
648 * and papabyDstImage arrays.
649 *
650 * This field is required.
651 */
652
653/**
654 * \var int GDALWarpKernel::nSrcXSize;
655 *
656 * Source image width in pixels.
657 *
658 * This field is required.
659 */
660
661/**
662 * \var int GDALWarpKernel::nSrcYSize;
663 *
664 * Source image height in pixels.
665 *
666 * This field is required.
667 */
668
669/**
670 * \var double GDALWarpKernel::dfSrcXExtraSize;
671 *
672 * Number of pixels included in nSrcXSize that are present on the edges of
673 * the area of interest to take into account the width of the kernel.
674 *
675 * This field is required.
676 */
677
678/**
679 * \var double GDALWarpKernel::dfSrcYExtraSize;
680 *
681 * Number of pixels included in nSrcYExtraSize that are present on the edges of
682 * the area of interest to take into account the height of the kernel.
683 *
684 * This field is required.
685 */
686
687/**
688 * \var int GDALWarpKernel::papabySrcImage;
689 *
690 * Array of source image band data.
691 *
692 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
693 * to image data. Each individual band of image data is organized as a single
694 * block of image data in left to right, then bottom to top order. The actual
695 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
696 *
697 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
698 * the second band with eWorkingDataType set to GDT_Float32 use code like
699 * this:
700 *
701 * \code
702 * float dfPixelValue;
703 * int nBand = 1; // Band indexes are zero based.
704 * int nPixel = 3; // Zero based.
705 * int nLine = 4; // Zero based.
706 *
707 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
708 * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
709 * assert( nBand >= 0 && nBand < poKern->nBands );
710 * dfPixelValue = ((float *) poKern->papabySrcImage[nBand-1])
711 * [nPixel + nLine * poKern->nSrcXSize];
712 * \endcode
713 *
714 * This field is required.
715 */
716
717/**
718 * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
719 *
720 * Per band validity mask for source pixels.
721 *
722 * Array of pixel validity mask layers for each source band. Each of
723 * the mask layers is the same size (in pixels) as the source image with
724 * one bit per pixel. Note that it is legal (and common) for this to be
725 * NULL indicating that none of the pixels are invalidated, or for some
726 * band validity masks to be NULL in which case all pixels of the band are
727 * valid. The following code can be used to test the validity of a particular
728 * pixel.
729 *
730 * \code
731 * int bIsValid = TRUE;
732 * int nBand = 1; // Band indexes are zero based.
733 * int nPixel = 3; // Zero based.
734 * int nLine = 4; // Zero based.
735 *
736 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
737 * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
738 * assert( nBand >= 0 && nBand < poKern->nBands );
739 *
740 * if( poKern->papanBandSrcValid != NULL
741 * && poKern->papanBandSrcValid[nBand] != NULL )
742 * {
743 * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
744 * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
745 *
746 * bIsValid = panBandMask[iPixelOffset>>5]
747 * & (0x01 << (iPixelOffset & 0x1f));
748 * }
749 * \endcode
750 */
751
752/**
753 * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
754 *
755 * Per pixel validity mask for source pixels.
756 *
757 * A single validity mask layer that applies to the pixels of all source
758 * bands. It is accessed similarly to papanBandSrcValid, but without the
759 * extra level of band indirection.
760 *
761 * This pointer may be NULL indicating that all pixels are valid.
762 *
763 * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
764 * the pixel isn't considered to be valid unless both arrays indicate it is
765 * valid.
766 */
767
768/**
769 * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
770 *
771 * Per pixel density mask for source pixels.
772 *
773 * A single density mask layer that applies to the pixels of all source
774 * bands. It contains values between 0.0 and 1.0 indicating the degree to
775 * which this pixel should be allowed to contribute to the output result.
776 *
777 * This pointer may be NULL indicating that all pixels have a density of 1.0.
778 *
779 * The density for a pixel may be accessed like this:
780 *
781 * \code
782 * float fDensity = 1.0;
783 * int nPixel = 3; // Zero based.
784 * int nLine = 4; // Zero based.
785 *
786 * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
787 * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
788 * if( poKern->pafUnifiedSrcDensity != NULL )
789 * fDensity = poKern->pafUnifiedSrcDensity
790 * [nPixel + nLine * poKern->nSrcXSize];
791 * \endcode
792 */
793
794/**
795 * \var int GDALWarpKernel::nDstXSize;
796 *
797 * Width of destination image in pixels.
798 *
799 * This field is required.
800 */
801
802/**
803 * \var int GDALWarpKernel::nDstYSize;
804 *
805 * Height of destination image in pixels.
806 *
807 * This field is required.
808 */
809
810/**
811 * \var GByte **GDALWarpKernel::papabyDstImage;
812 *
813 * Array of destination image band data.
814 *
815 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
816 * to image data. Each individual band of image data is organized as a single
817 * block of image data in left to right, then bottom to top order. The actual
818 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
819 *
820 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
821 * the second band with eWorkingDataType set to GDT_Float32 use code like
822 * this:
823 *
824 * \code
825 * float dfPixelValue;
826 * int nBand = 1; // Band indexes are zero based.
827 * int nPixel = 3; // Zero based.
828 * int nLine = 4; // Zero based.
829 *
830 * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
831 * assert( nLine >= 0 && nLine < poKern->nDstYSize );
832 * assert( nBand >= 0 && nBand < poKern->nBands );
833 * dfPixelValue = ((float *) poKern->papabyDstImage[nBand-1])
834 * [nPixel + nLine * poKern->nSrcYSize];
835 * \endcode
836 *
837 * This field is required.
838 */
839
840/**
841 * \var GUInt32 *GDALWarpKernel::panDstValid;
842 *
843 * Per pixel validity mask for destination pixels.
844 *
845 * A single validity mask layer that applies to the pixels of all destination
846 * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
847 * on the size of the destination image.
848 *
849 * This pointer may be NULL indicating that all pixels are valid.
850 */
851
852/**
853 * \var float *GDALWarpKernel::pafDstDensity;
854 *
855 * Per pixel density mask for destination pixels.
856 *
857 * A single density mask layer that applies to the pixels of all destination
858 * bands. It contains values between 0.0 and 1.0.
859 *
860 * This pointer may be NULL indicating that all pixels have a density of 1.0.
861 *
862 * The density for a pixel may be accessed like this:
863 *
864 * \code
865 * float fDensity = 1.0;
866 * int nPixel = 3; // Zero based.
867 * int nLine = 4; // Zero based.
868 *
869 * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
870 * assert( nLine >= 0 && nLine < poKern->nDstYSize );
871 * if( poKern->pafDstDensity != NULL )
872 * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
873 * \endcode
874 */
875
876/**
877 * \var int GDALWarpKernel::nSrcXOff;
878 *
879 * X offset to source pixel coordinates for transformation.
880 *
881 * See pfnTransformer.
882 *
883 * This field is required.
884 */
885
886/**
887 * \var int GDALWarpKernel::nSrcYOff;
888 *
889 * Y offset to source pixel coordinates for transformation.
890 *
891 * See pfnTransformer.
892 *
893 * This field is required.
894 */
895
896/**
897 * \var int GDALWarpKernel::nDstXOff;
898 *
899 * X offset to destination pixel coordinates for transformation.
900 *
901 * See pfnTransformer.
902 *
903 * This field is required.
904 */
905
906/**
907 * \var int GDALWarpKernel::nDstYOff;
908 *
909 * Y offset to destination pixel coordinates for transformation.
910 *
911 * See pfnTransformer.
912 *
913 * This field is required.
914 */
915
916/**
917 * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
918 *
919 * Source/destination location transformer.
920 *
921 * The function to call to transform coordinates between source image
922 * pixel/line coordinates and destination image pixel/line coordinates.
923 * See GDALTransformerFunc() for details of the semantics of this function.
924 *
925 * The GDALWarpKern algorithm will only ever use this transformer in
926 * "destination to source" mode (bDstToSrc=TRUE), and will always pass
927 * partial or complete scanlines of points in the destination image as
928 * input. This means, among other things, that it is safe to the the
929 * approximating transform GDALApproxTransform() as the transformation
930 * function.
931 *
932 * Source and destination images may be subsets of a larger overall image.
933 * The transformation algorithms will expect and return pixel/line coordinates
934 * in terms of this larger image, so coordinates need to be offset by
935 * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
936 * passing to pfnTransformer, and after return from it.
937 *
938 * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
939 * data to this function when it is called.
940 *
941 * This field is required.
942 */
943
944/**
945 * \var void *GDALWarpKernel::pTransformerArg;
946 *
947 * Callback data for pfnTransformer.
948 *
949 * This field may be NULL if not required for the pfnTransformer being used.
950 */
951
952/**
953 * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
954 *
955 * The function to call to report progress of the algorithm, and to check
956 * for a requested termination of the operation. It operates according to
957 * GDALProgressFunc() semantics.
958 *
959 * Generally speaking the progress function will be invoked for each
960 * scanline of the destination buffer that has been processed.
961 *
962 * This field may be NULL (internally set to GDALDummyProgress()).
963 */
964
965/**
966 * \var void *GDALWarpKernel::pProgress;
967 *
968 * Callback data for pfnProgress.
969 *
970 * This field may be NULL if not required for the pfnProgress being used.
971 */
972
973/************************************************************************/
974/* GDALWarpKernel() */
975/************************************************************************/
976
977GDALWarpKernel::GDALWarpKernel() :
978 papszWarpOptions(nullptr),
979 eResample(GRA_NearestNeighbour),
980 eWorkingDataType(GDT_Unknown),
981 nBands(0),
982 nSrcXSize(0),
983 nSrcYSize(0),
984 dfSrcXExtraSize(0.0),
985 dfSrcYExtraSize(0.0),
986 papabySrcImage(nullptr),
987 papanBandSrcValid(nullptr),
988 panUnifiedSrcValid(nullptr),
989 pafUnifiedSrcDensity(nullptr),
990 nDstXSize(0),
991 nDstYSize(0),
992 papabyDstImage(nullptr),
993 panDstValid(nullptr),
994 pafDstDensity(nullptr),
995 dfXScale(1.0),
996 dfYScale(1.0),
997 dfXFilter(0.0),
998 dfYFilter(0.0),
999 nXRadius(0),
1000 nYRadius(0),
1001 nFiltInitX(0),
1002 nFiltInitY(0),
1003 nSrcXOff(0),
1004 nSrcYOff(0),
1005 nDstXOff(0),
1006 nDstYOff(0),
1007 pfnTransformer(nullptr),
1008 pTransformerArg(nullptr),
1009 pfnProgress(GDALDummyProgress),
1010 pProgress(nullptr),
1011 dfProgressBase(0.0),
1012 dfProgressScale(1.0),
1013 padfDstNoDataReal(nullptr),
1014 psThreadData(nullptr)
1015{}
1016
1017/************************************************************************/
1018/* ~GDALWarpKernel() */
1019/************************************************************************/
1020
1021GDALWarpKernel::~GDALWarpKernel() {}
1022
1023/************************************************************************/
1024/* PerformWarp() */
1025/************************************************************************/
1026
1027/**
1028 * \fn CPLErr GDALWarpKernel::PerformWarp();
1029 *
1030 * This method performs the warp described in the GDALWarpKernel.
1031 *
1032 * @return CE_None on success or CE_Failure if an error occurs.
1033 */
1034
1035CPLErr GDALWarpKernel::PerformWarp()
1036
1037{
1038 const CPLErr eErr = Validate();
1039
1040 if( eErr != CE_None )
1041 return eErr;
1042
1043 // See #2445 and #3079.
1044 if( nSrcXSize <= 0 || nSrcYSize <= 0 )
1045 {
1046 if( !pfnProgress( dfProgressBase + dfProgressScale,
1047 "", pProgress ) )
1048 {
1049 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" );
1050 return CE_Failure;
1051 }
1052 return CE_None;
1053 }
1054
1055/* -------------------------------------------------------------------- */
1056/* Pre-calculate resampling scales and window sizes for filtering. */
1057/* -------------------------------------------------------------------- */
1058
1059 dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1060 dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1061 if( nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize )
1062 dfXScale = 1.0;
1063 if( nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize )
1064 dfYScale = 1.0;
1065 if( dfXScale < 1.0 )
1066 {
1067 double dfXReciprocalScale = 1.0 / dfXScale;
1068 const int nXReciprocalScale =
1069 static_cast<int>(dfXReciprocalScale + 0.5);
1070 if( fabs(dfXReciprocalScale-nXReciprocalScale) < 0.05 )
1071 dfXScale = 1.0 / nXReciprocalScale;
1072 }
1073 if( dfYScale < 1.0 )
1074 {
1075 double dfYReciprocalScale = 1.0 / dfYScale;
1076 const int nYReciprocalScale =
1077 static_cast<int>(dfYReciprocalScale + 0.5);
1078 if( fabs(dfYReciprocalScale-nYReciprocalScale) < 0.05 )
1079 dfYScale = 1.0 / nYReciprocalScale;
1080 }
1081
1082 // XSCALE and YSCALE undocumented for now. Can help in some cases.
1083 // Best would probably be a per-pixel scale computation.
1084 const char* pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1085 if( pszXScale != nullptr )
1086 dfXScale = CPLAtof(pszXScale);
1087 const char* pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1088 if( pszYScale != nullptr )
1089 dfYScale = CPLAtof(pszYScale);
1090
1091#if DEBUG_VERBOSE
1092 CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1093#endif
1094
1095 const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1096
1097 // Safety check for callers that would use GDALWarpKernel without using
1098 // GDALWarpOperation.
1099 if( (eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1100 ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1101 !bUse4SamplesFormula)) &&
1102 atoi(CSLFetchNameValueDef(papszWarpOptions,
1103 "EXTRA_ELTS", "0") ) != WARP_EXTRA_ELTS )
1104 {
1105 CPLError( CE_Failure, CPLE_AppDefined,
1106 "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1107 "their end. "
1108 "See GDALWarpKernel class definition. If this condition is "
1109 "fulfilled, define a EXTRA_ELTS=%d warp options",
1110 WARP_EXTRA_ELTS);
1111 return CE_Failure;
1112 }
1113
1114 dfXFilter = anGWKFilterRadius[eResample];
1115 dfYFilter = anGWKFilterRadius[eResample];
1116
1117 nXRadius =
1118 dfXScale < 1.0
1119 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1120 : static_cast<int>(dfXFilter);
1121 nYRadius =
1122 dfYScale < 1.0
1123 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1124 : static_cast<int>(dfYFilter);
1125
1126 // Filter window offset depends on the parity of the kernel radius.
1127 nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1128 nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1129
1130/* -------------------------------------------------------------------- */
1131/* Set up resampling functions. */
1132/* -------------------------------------------------------------------- */
1133 if( CPLFetchBool( papszWarpOptions, "USE_GENERAL_CASE", false ) )
1134 return GWKGeneralCase( this );
1135
1136#if defined(HAVE_OPENCL)
1137 if( (eWorkingDataType == GDT_Byte
1138 || eWorkingDataType == GDT_CInt16
1139 || eWorkingDataType == GDT_UInt16
1140 || eWorkingDataType == GDT_Int16
1141 || eWorkingDataType == GDT_CFloat32
1142 || eWorkingDataType == GDT_Float32) &&
1143 (eResample == GRA_Bilinear
1144 || eResample == GRA_Cubic
1145 || eResample == GRA_CubicSpline
1146 || eResample == GRA_Lanczos) &&
1147 CPLFetchBool( papszWarpOptions, "USE_OPENCL", true ) )
1148 {
1149 const CPLErr eResult = GWKOpenCLCase( this );
1150
1151 // CE_Warning tells us a suitable OpenCL environment was not available
1152 // so we fall through to other CPU based methods.
1153 if( eResult != CE_Warning )
1154 return eResult;
1155 }
1156#endif // defined HAVE_OPENCL
1157
1158 const bool bNoMasksOrDstDensityOnly =
1159 papanBandSrcValid == nullptr
1160 && panUnifiedSrcValid == nullptr
1161 && pafUnifiedSrcDensity == nullptr
1162 && panDstValid == nullptr;
1163
1164 if( eWorkingDataType == GDT_Byte
1165 && eResample == GRA_NearestNeighbour
1166 && bNoMasksOrDstDensityOnly )
1167 return GWKNearestNoMasksOrDstDensityOnlyByte( this );
1168
1169 if( eWorkingDataType == GDT_Byte
1170 && eResample == GRA_Bilinear
1171 && bNoMasksOrDstDensityOnly )
1172 return GWKBilinearNoMasksOrDstDensityOnlyByte( this );
1173
1174 if( eWorkingDataType == GDT_Byte
1175 && eResample == GRA_Cubic
1176 && bNoMasksOrDstDensityOnly )
1177 return GWKCubicNoMasksOrDstDensityOnlyByte( this );
1178
1179 if( eWorkingDataType == GDT_Byte
1180 && eResample == GRA_CubicSpline
1181 && bNoMasksOrDstDensityOnly )
1182 return GWKCubicSplineNoMasksOrDstDensityOnlyByte( this );
1183
1184 if( eWorkingDataType == GDT_Byte
1185 && eResample == GRA_NearestNeighbour )
1186 return GWKNearestByte( this );
1187
1188 if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16)
1189 && eResample == GRA_NearestNeighbour
1190 && bNoMasksOrDstDensityOnly )
1191 return GWKNearestNoMasksOrDstDensityOnlyShort( this );
1192
1193 if( (eWorkingDataType == GDT_Int16 )
1194 && eResample == GRA_Cubic
1195 && bNoMasksOrDstDensityOnly )
1196 return GWKCubicNoMasksOrDstDensityOnlyShort( this );
1197
1198 if( (eWorkingDataType == GDT_Int16 )
1199 && eResample == GRA_CubicSpline
1200 && bNoMasksOrDstDensityOnly )
1201 return GWKCubicSplineNoMasksOrDstDensityOnlyShort( this );
1202
1203 if( (eWorkingDataType == GDT_Int16 )
1204 && eResample == GRA_Bilinear
1205 && bNoMasksOrDstDensityOnly )
1206 return GWKBilinearNoMasksOrDstDensityOnlyShort( this );
1207
1208 if( (eWorkingDataType == GDT_UInt16 )
1209 && eResample == GRA_Cubic
1210 && bNoMasksOrDstDensityOnly )
1211 return GWKCubicNoMasksOrDstDensityOnlyUShort( this );
1212
1213 if( (eWorkingDataType == GDT_UInt16 )
1214 && eResample == GRA_CubicSpline
1215 && bNoMasksOrDstDensityOnly )
1216 return GWKCubicSplineNoMasksOrDstDensityOnlyUShort( this );
1217
1218 if( (eWorkingDataType == GDT_UInt16 )
1219 && eResample == GRA_Bilinear
1220 && bNoMasksOrDstDensityOnly )
1221 return GWKBilinearNoMasksOrDstDensityOnlyUShort( this );
1222
1223 if( (eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16)
1224 && eResample == GRA_NearestNeighbour )
1225 return GWKNearestShort( this );
1226
1227 if( eWorkingDataType == GDT_Float32
1228 && eResample == GRA_NearestNeighbour
1229 && bNoMasksOrDstDensityOnly )
1230 return GWKNearestNoMasksOrDstDensityOnlyFloat( this );
1231
1232 if( eWorkingDataType == GDT_Float32
1233 && eResample == GRA_NearestNeighbour )
1234 return GWKNearestFloat( this );
1235
1236 if( eWorkingDataType == GDT_Float32
1237 && eResample == GRA_Bilinear
1238 && bNoMasksOrDstDensityOnly )
1239 return GWKBilinearNoMasksOrDstDensityOnlyFloat( this );
1240
1241 if( eWorkingDataType == GDT_Float32
1242 && eResample == GRA_Cubic
1243 && bNoMasksOrDstDensityOnly )
1244 return GWKCubicNoMasksOrDstDensityOnlyFloat( this );
1245
1246#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1247 if( eWorkingDataType == GDT_Float64
1248 && eResample == GRA_Bilinear
1249 && bNoMasksOrDstDensityOnly )
1250 return GWKBilinearNoMasksOrDstDensityOnlyDouble( this );
1251
1252 if( eWorkingDataType == GDT_Float64
1253 && eResample == GRA_Cubic
1254 && bNoMasksOrDstDensityOnly )
1255 return GWKCubicNoMasksOrDstDensityOnlyDouble( this );
1256#endif
1257
1258 if( eResample == GRA_Average )
1259 return GWKAverageOrMode( this );
1260
1261 if( eResample == GRA_Mode )
1262 return GWKAverageOrMode( this );
1263
1264 if( eResample == GRA_Max )
1265 return GWKAverageOrMode( this );
1266
1267 if( eResample == GRA_Min )
1268 return GWKAverageOrMode( this );
1269
1270 if( eResample == GRA_Med )
1271 return GWKAverageOrMode( this );
1272
1273 if( eResample == GRA_Q1 )
1274 return GWKAverageOrMode( this );
1275
1276 if( eResample == GRA_Q3 )
1277 return GWKAverageOrMode( this );
1278
1279 if( !GDALDataTypeIsComplex(eWorkingDataType) )
1280 return GWKRealCase( this );
1281
1282 return GWKGeneralCase( this );
1283}
1284
1285/************************************************************************/
1286/* Validate() */
1287/************************************************************************/
1288
1289/**
1290 * \fn CPLErr GDALWarpKernel::Validate()
1291 *
1292 * Check the settings in the GDALWarpKernel, and issue a CPLError()
1293 * (and return CE_Failure) if the configuration is considered to be
1294 * invalid for some reason.
1295 *
1296 * This method will also do some standard defaulting such as setting
1297 * pfnProgress to GDALDummyProgress() if it is NULL.
1298 *
1299 * @return CE_None on success or CE_Failure if an error is detected.
1300 */
1301
1302CPLErr GDALWarpKernel::Validate()
1303
1304{
1305 if( static_cast<size_t>(eResample) >=
1306 (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])) )
1307 {
1308 CPLError( CE_Failure, CPLE_AppDefined,
1309 "Unsupported resampling method %d.",
1310 static_cast<int>(eResample) );
1311 return CE_Failure;
1312 }
1313
1314 return CE_None;
1315}
1316
1317/************************************************************************/
1318/* GWKOverlayDensity() */
1319/* */
1320/* Compute the final density for the destination pixel. This */
1321/* is a function of the overlay density (passed in) and the */
1322/* original density. */
1323/************************************************************************/
1324
1325static void GWKOverlayDensity( GDALWarpKernel *poWK, int iDstOffset,
1326 double dfDensity )
1327{
1328 if( dfDensity < 0.0001 || poWK->pafDstDensity == nullptr )
1329 return;
1330
1331 poWK->pafDstDensity[iDstOffset] = (float)
1332 ( 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]) );
1333}
1334
1335/************************************************************************/
1336/* GWKRoundValueT() */
1337/************************************************************************/
1338
1339template<class T, EMULATED_BOOL is_signed> struct sGWKRoundValueT
1340{
1341 static T eval(double);
1342};
1343
1344template<class T> struct sGWKRoundValueT<T, true> /* signed */
1345{
1346 static T eval(double dfValue) { return (T)floor(dfValue + 0.5); }
1347};
1348
1349template<class T> struct sGWKRoundValueT<T, false> /* unsigned */
1350{
1351 static T eval(double dfValue) { return (T)(dfValue + 0.5); }
1352};
1353
1354template<class T> static T GWKRoundValueT(double dfValue)
1355{
1356 return sGWKRoundValueT<T, std::numeric_limits<T>::is_signed>::eval(dfValue);
1357}
1358
1359template<> float GWKRoundValueT<float>(double dfValue)
1360{
1361 return (float)dfValue;
1362}
1363
1364#ifdef notused
1365template<> double GWKRoundValueT<double>(double dfValue)
1366{
1367 return dfValue;
1368}
1369#endif
1370
1371/************************************************************************/
1372/* GWKClampValueT() */
1373/************************************************************************/
1374
1375template<class T>
1376static CPL_INLINE T GWKClampValueT(double dfValue)
1377{
1378 if( dfValue < std::numeric_limits<T>::min() )
1379 return std::numeric_limits<T>::min();
1380 else if( dfValue > std::numeric_limits<T>::max() )
1381 return std::numeric_limits<T>::max();
1382 else
1383 return GWKRoundValueT<T>(dfValue);
1384}
1385
1386template<> float GWKClampValueT<float>(double dfValue)
1387{
1388 return (float)dfValue;
1389}
1390
1391#ifdef notused
1392template<> double GWKClampValueT<double>(double dfValue)
1393{
1394 return dfValue;
1395}
1396#endif
1397
1398/************************************************************************/
1399/* GWKSetPixelValueRealT() */
1400/************************************************************************/
1401
1402template<class T>
1403static bool GWKSetPixelValueRealT( GDALWarpKernel *poWK, int iBand,
1404 int iDstOffset, double dfDensity,
1405 T value)
1406{
1407 T *pDst = reinterpret_cast<T*>(poWK->papabyDstImage[iBand]);
1408
1409/* -------------------------------------------------------------------- */
1410/* If the source density is less than 100% we need to fetch the */
1411/* existing destination value, and mix it with the source to */
1412/* get the new "to apply" value. Also compute composite */
1413/* density. */
1414/* */
1415/* We avoid mixing if density is very near one or risk mixing */
1416/* in very extreme nodata values and causing odd results (#1610) */
1417/* -------------------------------------------------------------------- */
1418 if( dfDensity < 0.9999 )
1419 {
1420 if( dfDensity < 0.0001 )
1421 return true;
1422
1423 double dfDstDensity = 1.0;
1424
1425 if( poWK->pafDstDensity != nullptr )
1426 dfDstDensity = poWK->pafDstDensity[iDstOffset];
1427 else if( poWK->panDstValid != nullptr
1428 && !((poWK->panDstValid[iDstOffset>>5]
1429 & (0x01 << (iDstOffset & 0x1f))) ) )
1430 dfDstDensity = 0.0;
1431
1432 // It seems like we also ought to be testing panDstValid[] here!
1433
1434 const double dfDstReal = pDst[iDstOffset];
1435
1436 // The destination density is really only relative to the portion
1437 // not occluded by the overlay.
1438 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1439
1440 const double dfReal =
1441 (value * dfDensity + dfDstReal * dfDstInfluence)
1442 / (dfDensity + dfDstInfluence);
1443
1444/* -------------------------------------------------------------------- */
1445/* Actually apply the destination value. */
1446/* */
1447/* Avoid using the destination nodata value for integer datatypes */
1448/* if by chance it is equal to the computed pixel value. */
1449/* -------------------------------------------------------------------- */
1450 pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1451 }
1452 else
1453 {
1454 pDst[iDstOffset] = value;
1455 }
1456
1457 if( poWK->padfDstNoDataReal != nullptr &&
1458 poWK->padfDstNoDataReal[iBand] ==
1459 static_cast<double>(pDst[iDstOffset]) )
1460 {
1461 if( pDst[iDstOffset] == std::numeric_limits<T>::min() )
1462 pDst[iDstOffset] = std::numeric_limits<T>::min() + 1;
1463 else
1464 pDst[iDstOffset]--;
1465 }
1466
1467 return true;
1468}
1469
1470/************************************************************************/
1471/* GWKSetPixelValue() */
1472/************************************************************************/
1473
1474static bool GWKSetPixelValue( GDALWarpKernel *poWK, int iBand,
1475 int iDstOffset, double dfDensity,
1476 double dfReal, double dfImag )
1477
1478{
1479 GByte *pabyDst = poWK->papabyDstImage[iBand];
1480
1481/* -------------------------------------------------------------------- */
1482/* If the source density is less than 100% we need to fetch the */
1483/* existing destination value, and mix it with the source to */
1484/* get the new "to apply" value. Also compute composite */
1485/* density. */
1486/* */
1487/* We avoid mixing if density is very near one or risk mixing */
1488/* in very extreme nodata values and causing odd results (#1610) */
1489/* -------------------------------------------------------------------- */
1490 if( dfDensity < 0.9999 )
1491 {
1492 if( dfDensity < 0.0001 )
1493 return true;
1494
1495 double dfDstDensity = 1.0;
1496 if( poWK->pafDstDensity != nullptr )
1497 dfDstDensity = poWK->pafDstDensity[iDstOffset];
1498 else if( poWK->panDstValid != nullptr
1499 && !((poWK->panDstValid[iDstOffset>>5]
1500 & (0x01 << (iDstOffset & 0x1f))) ) )
1501 dfDstDensity = 0.0;
1502
1503 double dfDstReal = 0.0;
1504 double dfDstImag = 0.0;
1505 // It seems like we also ought to be testing panDstValid[] here!
1506
1507 // TODO(schwehr): Factor out this repreated type of set.
1508 switch( poWK->eWorkingDataType )
1509 {
1510 case GDT_Byte:
1511 dfDstReal = pabyDst[iDstOffset];
1512 dfDstImag = 0.0;
1513 break;
1514
1515 case GDT_Int16:
1516 dfDstReal = ((GInt16 *) pabyDst)[iDstOffset];
1517 dfDstImag = 0.0;
1518 break;
1519
1520 case GDT_UInt16:
1521 dfDstReal = ((GUInt16 *) pabyDst)[iDstOffset];
1522 dfDstImag = 0.0;
1523 break;
1524
1525 case GDT_Int32:
1526 dfDstReal = ((GInt32 *) pabyDst)[iDstOffset];
1527 dfDstImag = 0.0;
1528 break;
1529
1530 case GDT_UInt32:
1531 dfDstReal = ((GUInt32 *) pabyDst)[iDstOffset];
1532 dfDstImag = 0.0;
1533 break;
1534
1535 case GDT_Float32:
1536 dfDstReal = ((float *) pabyDst)[iDstOffset];
1537 dfDstImag = 0.0;
1538 break;
1539
1540 case GDT_Float64:
1541 dfDstReal = ((double *) pabyDst)[iDstOffset];
1542 dfDstImag = 0.0;
1543 break;
1544
1545 case GDT_CInt16:
1546 dfDstReal = ((GInt16 *) pabyDst)[iDstOffset*2];
1547 dfDstImag = ((GInt16 *) pabyDst)[iDstOffset*2+1];
1548 break;
1549
1550 case GDT_CInt32:
1551 dfDstReal = ((GInt32 *) pabyDst)[iDstOffset*2];
1552 dfDstImag = ((GInt32 *) pabyDst)[iDstOffset*2+1];
1553 break;
1554
1555 case GDT_CFloat32:
1556 dfDstReal = ((float *) pabyDst)[iDstOffset*2];
1557 dfDstImag = ((float *) pabyDst)[iDstOffset*2+1];
1558 break;
1559
1560 case GDT_CFloat64:
1561 dfDstReal = ((double *) pabyDst)[iDstOffset*2];
1562 dfDstImag = ((double *) pabyDst)[iDstOffset*2+1];
1563 break;
1564
1565 default:
1566 CPLAssert( false );
1567 dfDstDensity = 0.0;
1568 return false;
1569 }
1570
1571 // The destination density is really only relative to the portion
1572 // not occluded by the overlay.
1573 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1574
1575 dfReal =
1576 (dfReal * dfDensity + dfDstReal * dfDstInfluence)
1577 / (dfDensity + dfDstInfluence);
1578
1579 dfImag =
1580 (dfImag * dfDensity + dfDstImag * dfDstInfluence)
1581 / (dfDensity + dfDstInfluence);
1582 }
1583
1584/* -------------------------------------------------------------------- */
1585/* Actually apply the destination value. */
1586/* */
1587/* Avoid using the destination nodata value for integer datatypes */
1588/* if by chance it is equal to the computed pixel value. */
1589/* -------------------------------------------------------------------- */
1590
1591// TODO(schwehr): Can we make this a template?
1592#define CLAMP(type) \
1593 do { \
1594 type* _pDst = reinterpret_cast<type*>(pabyDst); \
1595 if( dfReal < static_cast<double>(std::numeric_limits<type>::min()) ) \
1596 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::min()); \
1597 else if( dfReal > static_cast<double>(std::numeric_limits<type>::max()) ) \
1598 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::max()); \
1599 else \
1600 _pDst[iDstOffset] = \
1601 (std::numeric_limits<type>::is_signed) ? \
1602 static_cast<type>(floor(dfReal + 0.5)) : \
1603 static_cast<type>(dfReal + 0.5); \
1604 if( poWK->padfDstNoDataReal != nullptr && \
1605 poWK->padfDstNoDataReal[iBand] == static_cast<double>(_pDst[iDstOffset]) ) \
1606 { \
1607 if( _pDst[iDstOffset] == static_cast<type>(std::numeric_limits<type>::min()) ) \
1608 _pDst[iDstOffset] = static_cast<type>(std::numeric_limits<type>::min() + 1); \
1609 else \
1610 _pDst[iDstOffset]--; \
1611 } } while( false )
1612
1613 switch( poWK->eWorkingDataType )
1614 {
1615 case GDT_Byte:
1616 CLAMP(GByte);
1617 break;
1618
1619 case GDT_Int16:
1620 CLAMP(GInt16);
1621 break;
1622
1623 case GDT_UInt16:
1624 CLAMP(GUInt16);
1625 break;
1626
1627 case GDT_UInt32:
1628 CLAMP(GUInt32);
1629 break;
1630
1631 case GDT_Int32:
1632 CLAMP(GInt32);
1633 break;
1634
1635 case GDT_Float32:
1636 ((float *) pabyDst)[iDstOffset] = static_cast<float>(dfReal);
1637 break;
1638
1639 case GDT_Float64:
1640 ((double *) pabyDst)[iDstOffset] = dfReal;
1641 break;
1642
1643 case GDT_CInt16:
1644 {
1645 typedef GInt16 T;
1646 if( dfReal < static_cast<double>(std::numeric_limits<T>::min()) )
1647 ((T *) pabyDst)[iDstOffset*2] = std::numeric_limits<T>::min();
1648 else if( dfReal > static_cast<double>(std::numeric_limits<T>::max()) )
1649 ((T *) pabyDst)[iDstOffset*2] = std::numeric_limits<T>::max();
1650 else
1651 ((T *) pabyDst)[iDstOffset*2] = static_cast<T>(floor(dfReal+0.5));
1652 if( dfImag < static_cast<double>(std::numeric_limits<T>::min()) )
1653 ((T *) pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::min();
1654 else if( dfImag > static_cast<double>(std::numeric_limits<T>::max()) )
1655 ((T *) pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::max();
1656 else
1657 ((T *) pabyDst)[iDstOffset*2+1] = static_cast<T>(floor(dfImag+0.5));
1658 break;
1659 }
1660
1661 case GDT_CInt32:
1662 {
1663 typedef GInt32 T;
1664 if( dfReal < static_cast<double>(std::numeric_limits<T>::min()) )
1665 ((T *) pabyDst)[iDstOffset*2] = std::numeric_limits<T>::min();
1666 else if( dfReal > static_cast<double>(std::numeric_limits<T>::max()) )
1667 ((T *) pabyDst)[iDstOffset*2] = std::numeric_limits<T>::max();
1668 else
1669 ((T *) pabyDst)[iDstOffset*2] = static_cast<T>(floor(dfReal+0.5));
1670 if( dfImag < static_cast<double>(std::numeric_limits<T>::min()) )
1671 ((T *) pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::min();
1672 else if( dfImag > static_cast<double>(std::numeric_limits<T>::max()) )
1673 ((T *) pabyDst)[iDstOffset*2+1] = std::numeric_limits<T>::max();
1674 else
1675 ((T *) pabyDst)[iDstOffset*2+1] = static_cast<T>(floor(dfImag+0.5));
1676 break;
1677 }
1678
1679 case GDT_CFloat32:
1680 ((float *) pabyDst)[iDstOffset*2] = (float) dfReal;
1681 ((float *) pabyDst)[iDstOffset*2+1] = (float) dfImag;
1682 break;
1683
1684 case GDT_CFloat64:
1685 ((double *) pabyDst)[iDstOffset*2] = (double) dfReal;
1686 ((double *) pabyDst)[iDstOffset*2+1] = (double) dfImag;
1687 break;
1688
1689 default:
1690 return false;
1691 }
1692
1693 return true;
1694}
1695
1696/************************************************************************/
1697/* GWKSetPixelValueReal() */
1698/************************************************************************/
1699
1700static bool GWKSetPixelValueReal( GDALWarpKernel *poWK, int iBand,
1701 int iDstOffset, double dfDensity,
1702 double dfReal )
1703
1704{
1705 GByte *pabyDst = poWK->papabyDstImage[iBand];
1706
1707/* -------------------------------------------------------------------- */
1708/* If the source density is less than 100% we need to fetch the */
1709/* existing destination value, and mix it with the source to */
1710/* get the new "to apply" value. Also compute composite */
1711/* density. */
1712/* */
1713/* We avoid mixing if density is very near one or risk mixing */
1714/* in very extreme nodata values and causing odd results (#1610) */
1715/* -------------------------------------------------------------------- */
1716 if( dfDensity < 0.9999 )
1717 {
1718 if( dfDensity < 0.0001 )
1719 return true;
1720
1721 double dfDstReal = 0.0;
1722 double dfDstDensity = 1.0;
1723
1724 if( poWK->pafDstDensity != nullptr )
1725 dfDstDensity = poWK->pafDstDensity[iDstOffset];
1726 else if( poWK->panDstValid != nullptr
1727 && !((poWK->panDstValid[iDstOffset>>5]
1728 & (0x01 << (iDstOffset & 0x1f))) ) )
1729 dfDstDensity = 0.0;
1730
1731 // It seems like we also ought to be testing panDstValid[] here!
1732
1733 switch( poWK->eWorkingDataType )
1734 {
1735 case GDT_Byte:
1736 dfDstReal = pabyDst[iDstOffset];
1737 break;
1738
1739 case GDT_Int16:
1740 dfDstReal = ((GInt16 *) pabyDst)[iDstOffset];
1741 break;
1742
1743 case GDT_UInt16:
1744 dfDstReal = ((GUInt16 *) pabyDst)[iDstOffset];
1745 break;
1746
1747 case GDT_Int32:
1748 dfDstReal = ((GInt32 *) pabyDst)[iDstOffset];
1749 break;
1750
1751 case GDT_UInt32:
1752 dfDstReal = ((GUInt32 *) pabyDst)[iDstOffset];
1753 break;
1754
1755 case GDT_Float32:
1756 dfDstReal = ((float *) pabyDst)[iDstOffset];
1757 break;
1758
1759 case GDT_Float64:
1760 dfDstReal = ((double *) pabyDst)[iDstOffset];
1761 break;
1762
1763 default:
1764 CPLAssert( false );
1765 dfDstDensity = 0.0;
1766 return false;
1767 }
1768
1769 // The destination density is really only relative to the portion
1770 // not occluded by the overlay.
1771 const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1772
1773 dfReal =
1774 (dfReal * dfDensity + dfDstReal * dfDstInfluence)
1775 / (dfDensity + dfDstInfluence);
1776 }
1777
1778/* -------------------------------------------------------------------- */
1779/* Actually apply the destination value. */
1780/* */
1781/* Avoid using the destination nodata value for integer datatypes */
1782/* if by chance it is equal to the computed pixel value. */
1783/* -------------------------------------------------------------------- */
1784
1785 switch( poWK->eWorkingDataType )
1786 {
1787 case GDT_Byte:
1788 CLAMP(GByte);
1789 break;
1790
1791 case GDT_Int16:
1792 CLAMP(GInt16);
1793 break;
1794
1795 case GDT_UInt16:
1796 CLAMP(GUInt16);
1797 break;
1798
1799 case GDT_UInt32:
1800 CLAMP(GUInt32);
1801 break;
1802
1803 case GDT_Int32:
1804 CLAMP(GInt32);
1805 break;
1806
1807 case GDT_Float32:
1808 ((float *) pabyDst)[iDstOffset] = static_cast<float>(dfReal);
1809 break;
1810
1811 case GDT_Float64:
1812 ((double *) pabyDst)[iDstOffset] = dfReal;
1813 break;
1814
1815 default:
1816 return false;
1817 }
1818
1819 return true;
1820}
1821
1822/************************************************************************/
1823/* GWKGetPixelValue() */
1824/************************************************************************/
1825
1826/* It is assumed that panUnifiedSrcValid has been checked before */
1827
1828static bool GWKGetPixelValue( GDALWarpKernel *poWK, int iBand,
1829 int iSrcOffset, double *pdfDensity,
1830 double *pdfReal, double *pdfImag )
1831
1832{
1833 GByte *pabySrc = poWK->papabySrcImage[iBand];
1834
1835 if( poWK->papanBandSrcValid != nullptr
1836 && poWK->papanBandSrcValid[iBand] != nullptr
1837 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5]
1838 & (0x01 << (iSrcOffset & 0x1f)))) )
1839 {
1840 *pdfDensity = 0.0;
1841 return false;
1842 }
1843
1844 // TODO(schwehr): Fix casting.
1845 switch( poWK->eWorkingDataType )
1846 {
1847 case GDT_Byte:
1848 *pdfReal = pabySrc[iSrcOffset];
1849 *pdfImag = 0.0;
1850 break;
1851
1852 case GDT_Int16:
1853 *pdfReal = ((GInt16 *) pabySrc)[iSrcOffset];
1854 *pdfImag = 0.0;
1855 break;
1856
1857 case GDT_UInt16:
1858 *pdfReal = ((GUInt16 *) pabySrc)[iSrcOffset];
1859 *pdfImag = 0.0;
1860 break;
1861
1862 case GDT_Int32:
1863 *pdfReal = ((GInt32 *) pabySrc)[iSrcOffset];
1864 *pdfImag = 0.0;
1865 break;
1866
1867 case GDT_UInt32:
1868 *pdfReal = ((GUInt32 *) pabySrc)[iSrcOffset];
1869 *pdfImag = 0.0;
1870 break;
1871
1872 case GDT_Float32:
1873 *pdfReal = ((float *) pabySrc)[iSrcOffset];
1874 *pdfImag = 0.0;
1875 break;
1876
1877 case GDT_Float64:
1878 *pdfReal = ((double *) pabySrc)[iSrcOffset];
1879 *pdfImag = 0.0;
1880 break;
1881
1882 case GDT_CInt16:
1883 *pdfReal = ((GInt16 *) pabySrc)[iSrcOffset*2];
1884 *pdfImag = ((GInt16 *) pabySrc)[iSrcOffset*2+1];
1885 break;
1886
1887 case GDT_CInt32:
1888 *pdfReal = ((GInt32 *) pabySrc)[iSrcOffset*2];
1889 *pdfImag = ((GInt32 *) pabySrc)[iSrcOffset*2+1];
1890 break;
1891
1892 case GDT_CFloat32:
1893 *pdfReal = ((float *) pabySrc)[iSrcOffset*2];
1894 *pdfImag = ((float *) pabySrc)[iSrcOffset*2+1];
1895 break;
1896
1897 case GDT_CFloat64:
1898 *pdfReal = ((double *) pabySrc)[iSrcOffset*2];
1899 *pdfImag = ((double *) pabySrc)[iSrcOffset*2+1];
1900 break;
1901
1902 default:
1903 *pdfDensity = 0.0;
1904 return false;
1905 }
1906
1907 if( poWK->pafUnifiedSrcDensity != nullptr )
1908 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
1909 else
1910 *pdfDensity = 1.0;
1911
1912 return *pdfDensity != 0.0;
1913}
1914
1915/************************************************************************/
1916/* GWKGetPixelValueReal() */
1917/************************************************************************/
1918
1919static bool GWKGetPixelValueReal( GDALWarpKernel *poWK, int iBand,
1920 int iSrcOffset, double *pdfDensity,
1921 double *pdfReal )
1922
1923{
1924 GByte *pabySrc = poWK->papabySrcImage[iBand];
1925
1926 if( poWK->papanBandSrcValid != nullptr
1927 && poWK->papanBandSrcValid[iBand] != nullptr
1928 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5]
1929 & (0x01 << (iSrcOffset & 0x1f)))) )
1930 {
1931 *pdfDensity = 0.0;
1932 return false;
1933 }
1934
1935 switch( poWK->eWorkingDataType )
1936 {
1937 case GDT_Byte:
1938 *pdfReal = pabySrc[iSrcOffset];
1939 break;
1940
1941 case GDT_Int16:
1942 *pdfReal = ((GInt16 *) pabySrc)[iSrcOffset];
1943 break;
1944
1945 case GDT_UInt16:
1946 *pdfReal = ((GUInt16 *) pabySrc)[iSrcOffset];
1947 break;
1948
1949 case GDT_Int32:
1950 *pdfReal = ((GInt32 *) pabySrc)[iSrcOffset];
1951 break;
1952
1953 case GDT_UInt32:
1954 *pdfReal = ((GUInt32 *) pabySrc)[iSrcOffset];
1955 break;
1956
1957 case GDT_Float32:
1958 *pdfReal = ((float *) pabySrc)[iSrcOffset];
1959 break;
1960
1961 case GDT_Float64:
1962 *pdfReal = ((double *) pabySrc)[iSrcOffset];
1963 break;
1964
1965 default:
1966 CPLAssert(false);
1967 return false;
1968 }
1969
1970 if( poWK->pafUnifiedSrcDensity != nullptr )
1971 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
1972 else
1973 *pdfDensity = 1.0;
1974
1975 return *pdfDensity != 0.0;
1976}
1977
1978/************************************************************************/
1979/* GWKGetPixelRow() */
1980/************************************************************************/
1981
1982/* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
1983/* data-types. */
1984
1985static bool GWKGetPixelRow( GDALWarpKernel *poWK, int iBand,
1986 int iSrcOffset, int nHalfSrcLen,
1987 double* padfDensity,
1988 double adfReal[],
1989 double* padfImag )
1990{
1991 // We know that nSrcLen is even, so we can *always* unroll loops 2x.
1992 const int nSrcLen = nHalfSrcLen * 2;
1993 bool bHasValid = false;
1994
1995 if( padfDensity != nullptr )
1996 {
1997 // Init the density.
1998 for( int i = 0; i < nSrcLen; i += 2 )
1999 {
2000 padfDensity[i] = 1.0;
2001 padfDensity[i+1] = 1.0;
2002 }
2003
2004 if( poWK->panUnifiedSrcValid != nullptr )
2005 {
2006 for( int i = 0; i < nSrcLen; i += 2 )
2007 {
2008 if( poWK->panUnifiedSrcValid[(iSrcOffset+i)>>5]
2009 & (0x01 << ((iSrcOffset+i) & 0x1f)) )
2010 bHasValid = true;
2011 else
2012 padfDensity[i] = 0.0;
2013
2014 if( poWK->panUnifiedSrcValid[(iSrcOffset+i+1)>>5]
2015 & (0x01 << ((iSrcOffset+i+1) & 0x1f)) )
2016 bHasValid = true;
2017 else
2018 padfDensity[i+1] = 0.0;
2019 }
2020
2021 // Reset or fail as needed.
2022 if( bHasValid )
2023 bHasValid = false;
2024 else
2025 return false;
2026 }
2027
2028 if( poWK->papanBandSrcValid != nullptr
2029 && poWK->papanBandSrcValid[iBand] != nullptr )
2030 {
2031 for( int i = 0; i < nSrcLen; i += 2 )
2032 {
2033 if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i)>>5]
2034 & (0x01 << ((iSrcOffset+i) & 0x1f)) )
2035 bHasValid = true;
2036 else
2037 padfDensity[i] = 0.0;
2038
2039 if( poWK->papanBandSrcValid[iBand][(iSrcOffset+i+1)>>5]
2040 & (0x01 << ((iSrcOffset+i+1) & 0x1f)) )
2041 bHasValid = true;
2042 else
2043 padfDensity[i+1] = 0.0;
2044 }
2045
2046 // Reset or fail as needed.
2047 if( bHasValid )
2048 bHasValid = false;
2049 else
2050 return false;
2051 }
2052 }
2053
2054 // TODO(schwehr): Fix casting.
2055 // Fetch data.
2056 switch( poWK->eWorkingDataType )
2057 {
2058 case GDT_Byte:
2059 {
2060 GByte* pSrc = (GByte*) poWK->papabySrcImage[iBand];
2061 pSrc += iSrcOffset;
2062 for( int i = 0; i < nSrcLen; i += 2 )
2063 {
2064 adfReal[i] = pSrc[i];
2065 adfReal[i+1] = pSrc[i+1];
2066 }
2067 break;
2068 }
2069
2070 case GDT_Int16:
2071 {
2072 GInt16* pSrc = (GInt16*) poWK->papabySrcImage[iBand];
2073 pSrc += iSrcOffset;
2074 for( int i = 0; i < nSrcLen; i += 2 )
2075 {
2076 adfReal[i] = pSrc[i];
2077 adfReal[i+1] = pSrc[i+1];
2078 }
2079 break;
2080 }
2081
2082 case GDT_UInt16:
2083 {
2084 GUInt16* pSrc = (GUInt16*) poWK->papabySrcImage[iBand];
2085 pSrc += iSrcOffset;
2086 for( int i = 0; i < nSrcLen; i += 2 )
2087 {
2088 adfReal[i] = pSrc[i];
2089 adfReal[i+1] = pSrc[i+1];
2090 }
2091 break;
2092 }
2093
2094 case GDT_Int32:
2095 {
2096 GInt32* pSrc = (GInt32*) poWK->papabySrcImage[iBand];
2097 pSrc += iSrcOffset;
2098 for( int i = 0; i < nSrcLen; i += 2 )
2099 {
2100 adfReal[i] = pSrc[i];
2101 adfReal[i+1] = pSrc[i+1];
2102 }
2103 break;
2104 }
2105
2106 case GDT_UInt32:
2107 {
2108 GUInt32* pSrc = (GUInt32*) poWK->papabySrcImage[iBand];
2109 pSrc += iSrcOffset;
2110 for( int i = 0; i < nSrcLen; i += 2 )
2111 {
2112 adfReal[i] = pSrc[i];
2113 adfReal[i+1] = pSrc[i+1];
2114 }
2115 break;
2116 }
2117
2118 case GDT_Float32:
2119 {
2120 float* pSrc = (float*) poWK->papabySrcImage[iBand];
2121 pSrc += iSrcOffset;
2122 for( int i = 0; i < nSrcLen; i += 2 )
2123 {
2124 adfReal[i] = pSrc[i];
2125 adfReal[i+1] = pSrc[i+1];
2126 }
2127 break;
2128 }
2129
2130 case GDT_Float64:
2131 {
2132 double* pSrc = (double*) poWK->papabySrcImage[iBand];
2133 pSrc += iSrcOffset;
2134 for( int i = 0; i < nSrcLen; i += 2 )
2135 {
2136 adfReal[i] = pSrc[i];
2137 adfReal[i+1] = pSrc[i+1];
2138 }
2139 break;
2140 }
2141
2142 case GDT_CInt16:
2143 {
2144 GInt16* pSrc = (GInt16*) poWK->papabySrcImage[iBand];
2145 pSrc += 2 * iSrcOffset;
2146 for( int i = 0; i < nSrcLen; i += 2 )
2147 {
2148 adfReal[i] = pSrc[2*i];
2149 padfImag[i] = pSrc[2*i+1];
2150
2151 adfReal[i+1] = pSrc[2*i+2];
2152 padfImag[i+1] = pSrc[2*i+3];
2153 }
2154 break;
2155 }
2156
2157 case GDT_CInt32:
2158 {
2159 GInt32* pSrc = (GInt32*) poWK->papabySrcImage[iBand];
2160 pSrc += 2 * iSrcOffset;
2161 for( int i = 0; i < nSrcLen; i += 2 )
2162 {
2163 adfReal[i] = pSrc[2*i];
2164 padfImag[i] = pSrc[2*i+1];
2165
2166 adfReal[i+1] = pSrc[2*i+2];
2167 padfImag[i+1] = pSrc[2*i+3];
2168 }
2169 break;
2170 }
2171
2172 case GDT_CFloat32:
2173 {
2174 float* pSrc = (float*) poWK->papabySrcImage[iBand];
2175 pSrc += 2 * iSrcOffset;
2176 for( int i = 0; i < nSrcLen; i += 2 )
2177 {
2178 adfReal[i] = pSrc[2*i];
2179 padfImag[i] = pSrc[2*i+1];
2180
2181 adfReal[i+1] = pSrc[2*i+2];
2182 padfImag[i+1] = pSrc[2*i+3];
2183 }
2184 break;
2185 }
2186
2187 case GDT_CFloat64:
2188 {
2189 double* pSrc = (double*) poWK->papabySrcImage[iBand];
2190 pSrc += 2 * iSrcOffset;
2191 for( int i = 0; i < nSrcLen; i += 2 )
2192 {
2193 adfReal[i] = pSrc[2*i];
2194 padfImag[i] = pSrc[2*i+1];
2195
2196 adfReal[i+1] = pSrc[2*i+2];
2197 padfImag[i+1] = pSrc[2*i+3];
2198 }
2199 break;
2200 }
2201
2202 default:
2203 CPLAssert(false);
2204 if( padfDensity )
2205 memset( padfDensity, 0, nSrcLen * sizeof(double) );
2206 return false;
2207 }
2208
2209 if( padfDensity == nullptr )
2210 return true;
2211
2212 if( poWK->pafUnifiedSrcDensity == nullptr )
2213 {
2214 for( int i = 0; i < nSrcLen; i += 2 )
2215 {
2216 // Take into account earlier calcs.
2217 if( padfDensity[i] > SRC_DENSITY_THRESHOLD )
2218 {
2219 padfDensity[i] = 1.0;
2220 bHasValid = true;
2221 }
2222
2223 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD )
2224 {
2225 padfDensity[i+1] = 1.0;
2226 bHasValid = true;
2227 }
2228 }
2229 }
2230 else
2231 {
2232 for( int i = 0; i < nSrcLen; i += 2 )
2233 {
2234 if( padfDensity[i] > SRC_DENSITY_THRESHOLD )
2235 padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset+i];
2236 if( padfDensity[i] > SRC_DENSITY_THRESHOLD )
2237 bHasValid = true;
2238
2239 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD )
2240 padfDensity[i+1] = poWK->pafUnifiedSrcDensity[iSrcOffset+i+1];
2241 if( padfDensity[i+1] > SRC_DENSITY_THRESHOLD )
2242 bHasValid = true;
2243 }
2244 }
2245
2246 return bHasValid;
2247}
2248
2249/************************************************************************/
2250/* GWKGetPixelT() */
2251/************************************************************************/
2252
2253template<class T>
2254static bool GWKGetPixelT( GDALWarpKernel *poWK, int iBand,
2255 int iSrcOffset, double *pdfDensity,
2256 T *pValue )
2257
2258{
2259 T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2260
2261 if( ( poWK->panUnifiedSrcValid != nullptr
2262 && !((poWK->panUnifiedSrcValid[iSrcOffset>>5]
2263 & (0x01 << (iSrcOffset & 0x1f))) ) )
2264 || ( poWK->papanBandSrcValid != nullptr
2265 && poWK->papanBandSrcValid[iBand] != nullptr
2266 && !((poWK->papanBandSrcValid[iBand][iSrcOffset>>5]
2267 & (0x01 << (iSrcOffset & 0x1f)))) ) )
2268 {
2269 *pdfDensity = 0.0;
2270 return false;
2271 }
2272
2273 *pValue = pSrc[iSrcOffset];
2274
2275 if( poWK->pafUnifiedSrcDensity == nullptr )
2276 *pdfDensity = 1.0;
2277 else
2278 *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2279
2280 return *pdfDensity != 0.0;
2281}
2282
2283/************************************************************************/
2284/* GWKBilinearResample() */
2285/* Set of bilinear interpolators */
2286/************************************************************************/
2287
2288static bool GWKBilinearResample4Sample( GDALWarpKernel *poWK, int iBand,
2289 double dfSrcX, double dfSrcY,
2290 double *pdfDensity,
2291 double *pdfReal, double *pdfImag )
2292
2293{
2294 // Save as local variables to avoid following pointers.
2295 const int nSrcXSize = poWK->nSrcXSize;
2296 const int nSrcYSize = poWK->nSrcYSize;
2297
2298 int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2299 int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2300 double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2301 double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2302 bool bShifted = false;
2303
2304 if( iSrcX == -1 )
2305 {
2306 iSrcX = 0;
2307 dfRatioX = 1;
2308 }
2309 if( iSrcY == -1 )
2310 {
2311 iSrcY = 0;
2312 dfRatioY = 1;
2313 }
2314 int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
2315
2316 // Shift so we don't overrun the array.
2317 if( nSrcXSize * nSrcYSize == iSrcOffset + 1
2318 || nSrcXSize * nSrcYSize == iSrcOffset + nSrcXSize + 1 )
2319 {
2320 bShifted = true;
2321 --iSrcOffset;
2322 }
2323
2324 double adfDensity[2] = { 0.0, 0.0 };
2325 double adfReal[2] = { 0.0, 0.0 };
2326 double adfImag[2] = { 0.0, 0.0 };
2327 double dfAccumulatorReal = 0.0;
2328 double dfAccumulatorImag = 0.0;
2329 double dfAccumulatorDensity = 0.0;
2330 double dfAccumulatorDivisor = 0.0;
2331
2332 // Get pixel row.
2333 if( iSrcY >= 0 && iSrcY < nSrcYSize
2334 && iSrcOffset >= 0 && iSrcOffset < nSrcXSize * nSrcYSize
2335 && GWKGetPixelRow( poWK, iBand, iSrcOffset, 1,
2336 adfDensity, adfReal, adfImag ) )
2337 {
2338 double dfMult1 = dfRatioX * dfRatioY;
2339 double dfMult2 = (1.0-dfRatioX) * dfRatioY;
2340
2341 // Shifting corrected.
2342 if( bShifted )
2343 {
2344 adfReal[0] = adfReal[1];
2345 adfImag[0] = adfImag[1];
2346 adfDensity[0] = adfDensity[1];
2347 }
2348
2349 // Upper Left Pixel.
2350 if( iSrcX >= 0 && iSrcX < nSrcXSize
2351 && adfDensity[0] > SRC_DENSITY_THRESHOLD )
2352 {
2353 dfAccumulatorDivisor += dfMult1;
2354
2355 dfAccumulatorReal += adfReal[0] * dfMult1;
2356 dfAccumulatorImag += adfImag[0] * dfMult1;
2357 dfAccumulatorDensity += adfDensity[0] * dfMult1;
2358 }
2359
2360 // Upper Right Pixel.
2361 if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize
2362 && adfDensity[1] > SRC_DENSITY_THRESHOLD )
2363 {
2364 dfAccumulatorDivisor += dfMult2;
2365
2366 dfAccumulatorReal += adfReal[1] * dfMult2;
2367 dfAccumulatorImag += adfImag[1] * dfMult2;
2368 dfAccumulatorDensity += adfDensity[1] * dfMult2;
2369 }
2370 }
2371
2372 // Get pixel row.
2373 if( iSrcY+1 >= 0 && iSrcY+1 < nSrcYSize
2374 && iSrcOffset+nSrcXSize >= 0
2375 && iSrcOffset+nSrcXSize < nSrcXSize * nSrcYSize
2376 && GWKGetPixelRow( poWK, iBand, iSrcOffset+nSrcXSize, 1,
2377 adfDensity, adfReal, adfImag ) )
2378 {
2379 double dfMult1 = dfRatioX * (1.0-dfRatioY);
2380 double dfMult2 = (1.0-dfRatioX) * (1.0-dfRatioY);
2381
2382 // Shifting corrected
2383 if ( bShifted )
2384 {
2385 adfReal[0] = adfReal[1];
2386 adfImag[0] = adfImag[1];
2387 adfDensity[0] = adfDensity[1];
2388 }
2389
2390 // Lower Left Pixel
2391 if ( iSrcX >= 0 && iSrcX < nSrcXSize
2392 && adfDensity[0] > SRC_DENSITY_THRESHOLD )
2393 {
2394 dfAccumulatorDivisor += dfMult1;
2395
2396 dfAccumulatorReal += adfReal[0] * dfMult1;
2397 dfAccumulatorImag += adfImag[0] * dfMult1;
2398 dfAccumulatorDensity += adfDensity[0] * dfMult1;
2399 }
2400
2401 // Lower Right Pixel.
2402 if( iSrcX+1 >= 0 && iSrcX+1 < nSrcXSize
2403 && adfDensity[1] > SRC_DENSITY_THRESHOLD )
2404 {
2405 dfAccumulatorDivisor += dfMult2;
2406
2407 dfAccumulatorReal += adfReal[1] * dfMult2;
2408 dfAccumulatorImag += adfImag[1] * dfMult2;
2409 dfAccumulatorDensity += adfDensity[1] * dfMult2;
2410 }
2411 }
2412
2413/* -------------------------------------------------------------------- */
2414/* Return result. */
2415/* -------------------------------------------------------------------- */
2416 if( dfAccumulatorDivisor == 1.0 )
2417 {
2418 *pdfReal = dfAccumulatorReal;
2419 *pdfImag = dfAccumulatorImag;
2420 *pdfDensity = dfAccumulatorDensity;
2421 return false;
2422 }
2423 else if( dfAccumulatorDivisor < 0.00001 )
2424 {
2425 *pdfReal = 0.0;
2426 *pdfImag = 0.0;
2427 *pdfDensity = 0.0;
2428 return false;
2429 }
2430 else
2431 {
2432 *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2433 *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2434 *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2435 return true;
2436 }
2437}
2438
2439template<class T>
2440static bool GWKBilinearResampleNoMasks4SampleT( GDALWarpKernel *poWK, int iBand,
2441 double dfSrcX, double dfSrcY,
2442 T *pValue )
2443
2444{
2445
2446 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2447 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2448 const int iSrcOffset = iSrcX + iSrcY * poWK->nSrcXSize;
2449 const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2450 const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2451
2452 T* pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2453
2454
2455 if( iSrcX >= 0 && iSrcX+1 < poWK->nSrcXSize
2456 && iSrcY >= 0 && iSrcY+1 < poWK->nSrcYSize )
2457 {
2458 // TODO(schwehr): Should be able to remove these casts.
2459 const double dfAccumulator =
2460 (static_cast<double>(pSrc[iSrcOffset]) * dfRatioX +
2461 static_cast<double>(pSrc[iSrcOffset+1]) * (1.0 - dfRatioX)) *
2462 dfRatioY +
2463 (static_cast<double>(pSrc[iSrcOffset+poWK->nSrcXSize]) * dfRatioX +
2464 static_cast<double>(pSrc[iSrcOffset+1+poWK->nSrcXSize]) *
2465 (1.0 - dfRatioX)) * (1.0-dfRatioY);
2466
2467 *pValue = GWKRoundValueT<T>(dfAccumulator);
2468
2469 return true;
2470 }
2471
2472 double dfMult = 0.0;
2473 double dfAccumulatorDivisor = 0.0;
2474 double dfAccumulator = 0.0;
2475
2476 // Upper Left Pixel.
2477 if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize
2478 && iSrcY >= 0 && iSrcY < poWK->nSrcYSize )
2479 {
2480 dfMult = dfRatioX * dfRatioY;
2481
2482 dfAccumulatorDivisor += dfMult;
2483
2484 dfAccumulator += pSrc[iSrcOffset] * dfMult;
2485 }
2486
2487 // Upper Right Pixel.
2488 if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize
2489 && iSrcY >= 0 && iSrcY < poWK->nSrcYSize )
2490 {
2491 dfMult = (1.0 - dfRatioX) * dfRatioY;
2492
2493 dfAccumulatorDivisor += dfMult;
2494
2495 dfAccumulator += pSrc[iSrcOffset+1] * dfMult;
2496 }
2497
2498 // Lower Right Pixel.
2499 if( iSrcX+1 >= 0 && iSrcX+1 < poWK->nSrcXSize
2500 && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize )
2501 {
2502 dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2503
2504 dfAccumulatorDivisor += dfMult;
2505
2506 dfAccumulator += pSrc[iSrcOffset+1+poWK->nSrcXSize] * dfMult;
2507 }
2508
2509 // Lower Left Pixel.
2510 if( iSrcX >= 0 && iSrcX < poWK->nSrcXSize
2511 && iSrcY+1 >= 0 && iSrcY+1 < poWK->nSrcYSize )
2512 {
2513 dfMult = dfRatioX * (1.0 - dfRatioY);
2514
2515 dfAccumulatorDivisor += dfMult;
2516
2517 dfAccumulator += pSrc[iSrcOffset+poWK->nSrcXSize] * dfMult;
2518 }
2519
2520/* -------------------------------------------------------------------- */
2521/* Return result. */
2522/* -------------------------------------------------------------------- */
2523 double dfValue = 0.0;
2524
2525 if( dfAccumulatorDivisor < 0.00001 )
2526 {
2527 *pValue = 0;
2528 return false;
2529 }
2530 else if( dfAccumulatorDivisor == 1.0 )
2531 {
2532 dfValue = dfAccumulator;
2533 }
2534 else
2535 {
2536 dfValue = dfAccumulator / dfAccumulatorDivisor;
2537 }
2538
2539 *pValue = GWKRoundValueT<T>(dfValue);
2540
2541 return true;
2542}
2543
2544/************************************************************************/
2545/* GWKCubicResample() */
2546/* Set of bicubic interpolators using cubic convolution. */
2547/************************************************************************/
2548
2549// http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2550// or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2551// http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2552
2553// TODO(schwehr): Use an inline function.
2554#define CubicConvolution(distance1,distance2,distance3,f0,f1,f2,f3) \
2555 ( f1 \
2556 + 0.5 * (distance1*(f2 - f0) \
2557 + distance2*(2.0*f0 - 5.0*f1 + 4.0*f2 - f3) \
2558 + distance3*(3.0*(f1 - f2) + f3 - f0)))
2559
2560/************************************************************************/
2561/* GWKCubicComputeWeights() */
2562/************************************************************************/
2563
2564// adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2565
2566// TODO(schwehr): Use an inline function.
2567#define GWKCubicComputeWeights(dfX_, adfCoeffs) \
2568{ \
2569 const double dfX = dfX_; \
2570 const double dfHalfX = 0.5 * dfX; \
2571 const double dfThreeX = 3.0 * dfX; \
2572 const double dfHalfX2 = dfHalfX * dfX; \
2573 \
2574 adfCoeffs[0] = dfHalfX * (-1 + dfX * (2 - dfX)); \
2575 adfCoeffs[1] = 1 + dfHalfX2 * (-5 + dfThreeX); \
2576 adfCoeffs[2] = dfHalfX * (1 + dfX * (4 - dfThreeX)); \
2577 adfCoeffs[3] = dfHalfX2 * (-1 + dfX); \
2578}
2579
2580// TODO(schwehr): Use an inline function.
2581#define CONVOL4(v1, v2) ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + \
2582 (v1)[2] * (v2)[2] + (v1)[3] * (v2)[3])
2583
2584#if 0
2585// Optimal (in theory...) for max 2 convolutions: 14 multiplications
2586// instead of 17.
2587// TODO(schwehr): Use an inline function.
2588#define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
2589{ \
2590 const double dfX = dfX_; \
2591 dfHalfX = 0.5 * dfX; \
2592 const double dfThreeX = 3.0 * dfX; \
2593 const double dfXMinus1 = dfX - 1; \
2594 \
2595 adfCoeffs[0] = -1 + dfX * (2 - dfX); \
2596 adfCoeffs[1] = dfX * (-5 + dfThreeX); \
2597 /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
2598 adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
2599 /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
2600 adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
2601}
2602
2603// TODO(schwehr): Use an inline function.
2604#define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
2605 ((v)[1] + (dfHalfX) * (\
2606 (adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2607 (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2608#endif
2609
2610static bool GWKCubicResample4Sample( GDALWarpKernel *poWK, int iBand,
2611 double dfSrcX, double dfSrcY,
2612 double *pdfDensity,
2613 double *pdfReal, double *pdfImag )
2614
2615{
2616 const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2617 const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2618 const int iSrcOffset = iSrcX + iSrcY * poWK->nSrcXSize;
2619 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2620 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2621 double adfDensity[4] = {};
2622 double adfReal[4] = {};
2623 double adfImag[4] = {};
2624 int i;
2625
2626 // Get the bilinear interpolation at the image borders.
2627 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize
2628 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize )
2629 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY,
2630 pdfDensity, pdfReal, pdfImag );
2631
2632 double adfValueDens[4] = {};
2633 double adfValueReal[4] = {};
2634 double adfValueImag[4] = {};
2635
2636 double adfCoeffsX[4] = {};
2637 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
2638
2639 for( i = -1; i < 3; i++ )
2640 {
2641 if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
2642 2, adfDensity, adfReal, adfImag)
2643 || adfDensity[0] < SRC_DENSITY_THRESHOLD
2644 || adfDensity[1] < SRC_DENSITY_THRESHOLD
2645 || adfDensity[2] < SRC_DENSITY_THRESHOLD
2646 || adfDensity[3] < SRC_DENSITY_THRESHOLD )
2647 {
2648 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY,
2649 pdfDensity, pdfReal, pdfImag );
2650 }
2651
2652 adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
2653 adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
2654 adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
2655 }
2656
2657/* -------------------------------------------------------------------- */
2658/* For now, if we have any pixels missing in the kernel area, */
2659/* we fallback on using bilinear interpolation. Ideally we */
2660/* should do "weight adjustment" of our results similarly to */
2661/* what is done for the cubic spline and lanc. interpolators. */
2662/* -------------------------------------------------------------------- */
2663
2664 double adfCoeffsY[4] = {};
2665 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
2666
2667 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
2668 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
2669 *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
2670
2671 return true;
2672}
2673
2674// We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
2675// perf benefit.
2676
2677#if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
2678
2679/************************************************************************/
2680/* XMMLoad4Values() */
2681/* */
2682/* Load 4 packed byte or uint16, cast them to float and put them in a */
2683/* m128 register. */
2684/************************************************************************/
2685
2686static CPL_INLINE __m128 XMMLoad4Values(const GByte* ptr)
2687{
2688#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
2689 unsigned int i;
2690 memcpy(&i, ptr, 4);
2691 __m128i xmm_i = _mm_cvtsi32_si128(s);
2692#else
2693 __m128i xmm_i = _mm_cvtsi32_si128(*(unsigned int*)(ptr));
2694#endif
2695 // Zero extend 4 packed unsigned 8-bit integers in a to packed
2696 // 32-bit integers.
2697#if __SSE4_1__
2698 xmm_i = _mm_cvtepu8_epi32(xmm_i);
2699#else
2700 xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
2701 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
2702#endif
2703 return _mm_cvtepi32_ps(xmm_i);
2704}
2705
2706static CPL_INLINE __m128 XMMLoad4Values(const GUInt16* ptr)
2707{
2708#ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
2709 GUInt64 i;
2710 memcpy(&i, ptr, 8);
2711 __m128i xmm_i = _mm_cvtsi64_si128(s);
2712#else
2713 __m128i xmm_i = _mm_cvtsi64_si128(*(GUInt64*)(ptr));
2714#endif
2715 // Zero extend 4 packed unsigned 16-bit integers in a to packed
2716 // 32-bit integers.
2717#if __SSE4_1__
2718 xmm_i = _mm_cvtepu16_epi32(xmm_i);
2719#else
2720 xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
2721#endif
2722 return _mm_cvtepi32_ps(xmm_i);
2723}
2724
2725/************************************************************************/
2726/* XMMHorizontalAdd() */
2727/* */
2728/* Return the sum of the 4 floating points of the register. */
2729/************************************************************************/
2730
2731#if __SSE3__
2732static CPL_INLINE float XMMHorizontalAdd(__m128 v)
2733{
2734 __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
2735 __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
2736 shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
2737 sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
2738 return _mm_cvtss_f32(sums);
2739}
2740#else
2741static CPL_INLINE float XMMHorizontalAdd(__m128 v)
2742{
2743 __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
2744 __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
2745 shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
2746 sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
2747 return _mm_cvtss_f32(sums);
2748}
2749#endif
2750
2751#endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
2752
2753/************************************************************************/
2754/* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
2755/************************************************************************/
2756
2757// Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
2758// because there are a few assumptions above those types.
2759
2760template<class T>
2761static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
2762 GDALWarpKernel *poWK, int iBand,
2763 double dfSrcX, double dfSrcY,
2764 double *pdfDensity,
2765 double *pdfReal )
2766{
2767 const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2768 const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2769 const int iSrcOffset = iSrcX + iSrcY * poWK->nSrcXSize;
2770
2771 // Get the bilinear interpolation at the image borders.
2772 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize
2773 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize )
2774 {
2775 double adfImagIgnored[4] = {};
2776 return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2777 pdfDensity, pdfReal, adfImagIgnored);
2778 }
2779
2780#if defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
2781 const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
2782 const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
2783
2784 // TODO(schwehr): Explain the magic numbers.
2785 float afTemp[4 + 4 + 4 + 1];
2786 float* pafAligned =
2787 reinterpret_cast<float*>(afTemp + ((size_t)afTemp & 0xf));
2788 float* pafCoeffs = pafAligned;
2789 float* pafDensity = pafAligned + 4;
2790 float* pafValue = pafAligned + 8;
2791
2792 const float fHalfDeltaX = 0.5f * fDeltaX;
2793 const float fThreeDeltaX = 3.0f * fDeltaX;
2794 const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
2795
2796 pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
2797 pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
2798 pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
2799 pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
2800 __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
2801 const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
2802
2803 __m128 xmmMaskLowDensity = _mm_setzero_ps();
2804 for( int i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1;
2805 i < 3; i++, iOffset += poWK->nSrcXSize )
2806 {
2807 const __m128 xmmDensity = _mm_loadu_ps(
2808 poWK->pafUnifiedSrcDensity + iOffset);
2809 xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
2810 _mm_cmplt_ps(xmmDensity, xmmThreshold));
2811 pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
2812
2813 const __m128 xmmValues = XMMLoad4Values(
2814 ((T*) poWK->papabySrcImage[iBand]) + iOffset);
2815 pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
2816 }
2817 if( _mm_movemask_ps(xmmMaskLowDensity) )
2818 {
2819 double adfImagIgnored[4] = {};
2820 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY,
2821 pdfDensity, pdfReal, adfImagIgnored );
2822 }
2823
2824 const float fHalfDeltaY = 0.5f * fDeltaY;
2825 const float fThreeDeltaY = 3.0f * fDeltaY;
2826 const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
2827
2828 pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
2829 pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
2830 pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
2831 pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
2832
2833 xmmCoeffs = _mm_load_ps(pafCoeffs);
2834
2835 const __m128 xmmDensity = _mm_load_ps(pafDensity);
2836 const __m128 xmmValue = _mm_load_ps(pafValue);
2837 *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
2838 *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
2839
2840 // We did all above computations on float32 whereas the general case is
2841 // float64. Not sure if one is fundamentally more correct than the other
2842 // one, but we want our optimization to give the same result as the
2843 // general case as much as possible, so if the resulting value is
2844 // close to some_int_value + 0.5, redo the computation with the general
2845 // case.
2846 // Note: If other types than Byte or UInt16, will need changes.
2847 if( fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007 )
2848 return true;
2849
2850#endif // defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64))
2851
2852 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2853 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2854
2855 double adfValueDens[4] = {};
2856 double adfValueReal[4] = {};
2857
2858 double adfCoeffsX[4] = {};
2859 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
2860
2861 double adfCoeffsY[4] = {};
2862 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
2863
2864 for( int i = -1; i < 3; i++ )
2865 {
2866 const int iOffset = iSrcOffset+i*poWK->nSrcXSize - 1;
2867#if !(defined(USE_SSE_CUBIC_IMPL) && (defined(__x86_64) || defined(_M_X64)))
2868 if( poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
2869 poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
2870 poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
2871 poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD )
2872 {
2873 double adfImagIgnored[4] = {};
2874 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY,
2875 pdfDensity, pdfReal, adfImagIgnored );
2876 }
2877#endif
2878
2879 adfValueDens[i + 1] = CONVOL4(
2880 adfCoeffsX,
2881 poWK->pafUnifiedSrcDensity + iOffset);
2882
2883 adfValueReal[i + 1] = CONVOL4(
2884 adfCoeffsX,
2885 reinterpret_cast<T*>(poWK->papabySrcImage[iBand]) + iOffset);
2886 }
2887
2888 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
2889 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
2890
2891 return true;
2892}
2893
2894/************************************************************************/
2895/* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
2896/* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
2897/************************************************************************/
2898
2899static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
2900 GDALWarpKernel *poWK, int iBand,
2901 double dfSrcX, double dfSrcY,
2902 double *pdfDensity,
2903 double *pdfReal )
2904
2905{
2906 const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2907 const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2908 const int iSrcOffset = iSrcX + iSrcY * poWK->nSrcXSize;
2909 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2910 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2911
2912 // Get the bilinear interpolation at the image borders.
2913 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize
2914 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize )
2915 {
2916 double adfImagIgnored[4] = {};
2917 return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
2918 pdfDensity, pdfReal, adfImagIgnored);
2919 }
2920
2921 double adfCoeffsX[4] = {};
2922 GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
2923
2924 double adfCoeffsY[4] = {};
2925 GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
2926
2927 double adfValueDens[4] = {};
2928 double adfValueReal[4] = {};
2929 double adfDensity[4] = {};
2930 double adfReal[4] = {};
2931 double adfImagIgnored[4] = {};
2932
2933 for( int i = -1; i < 3; i++ )
2934 {
2935 if( !GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
2936 2, adfDensity, adfReal, adfImagIgnored)
2937 || adfDensity[0] < SRC_DENSITY_THRESHOLD
2938 || adfDensity[1] < SRC_DENSITY_THRESHOLD
2939 || adfDensity[2] < SRC_DENSITY_THRESHOLD
2940 || adfDensity[3] < SRC_DENSITY_THRESHOLD )
2941 {
2942 return GWKBilinearResample4Sample( poWK, iBand, dfSrcX, dfSrcY,
2943 pdfDensity, pdfReal, adfImagIgnored );
2944 }
2945
2946 adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
2947 adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
2948 }
2949
2950 *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
2951 *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
2952
2953 return true;
2954}
2955
2956template<class T>
2957static bool GWKCubicResampleNoMasks4SampleT( GDALWarpKernel *poWK, int iBand,
2958 double dfSrcX, double dfSrcY,
2959 T *pValue )
2960
2961{
2962 const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2963 const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2964 const int iSrcOffset = iSrcX + iSrcY * poWK->nSrcXSize;
2965 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2966 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
2967 const double dfDeltaY2 = dfDeltaY * dfDeltaY;
2968 const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
2969
2970 // Get the bilinear interpolation at the image borders.
2971 if( iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize
2972 || iSrcY - 1 < 0 || iSrcY + 2 >= poWK->nSrcYSize )
2973 return GWKBilinearResampleNoMasks4SampleT ( poWK, iBand, dfSrcX, dfSrcY,
2974 pValue );
2975
2976 double adfCoeffs[4] = {};
2977 GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
2978
2979 double adfValue[4] = {};
2980
2981 for( int i = -1; i < 3; i++ )
2982 {
2983 const int iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
2984
2985 adfValue[i + 1] = CONVOL4(
2986 adfCoeffs,
2987 reinterpret_cast<T*>(poWK->papabySrcImage[iBand]) + iOffset);
2988 }
2989
2990 const double dfValue = CubicConvolution(
2991 dfDeltaY, dfDeltaY2, dfDeltaY3,
2992 adfValue[0], adfValue[1], adfValue[2], adfValue[3]);
2993
2994 *pValue = GWKClampValueT<T>(dfValue);
2995
2996 return true;
2997}
2998
2999/************************************************************************/
3000/* GWKLanczosSinc() */
3001/************************************************************************/
3002
3003/*
3004 * Lanczos windowed sinc interpolation kernel with radius r.
3005 * /
3006 * | sinc(x) * sinc(x/r), if |x| < r
3007 * L(x) = | 1, if x = 0 ,
3008 * | 0, otherwise
3009 * \
3010 *
3011 * where sinc(x) = sin(PI * x) / (PI * x).
3012 */
3013
3014static double GWKLanczosSinc( double dfX )
3015{
3016 if( dfX == 0.0 )
3017 return 1.0;
3018
3019 const double dfPIX = M_PI * dfX;
3020 const double dfPIXoverR = dfPIX / 3;
3021 const double dfPIX2overR = dfPIX * dfPIXoverR;
3022 return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3023}
3024
3025static double GWKLanczosSinc4Values( double* padfValues )
3026{
3027 for( int i = 0; i < 4; i++ )
3028 {
3029 if( padfValues[i] == 0.0 )
3030 {
3031 padfValues[i] = 1.0;
3032 }
3033 else
3034 {
3035 const double dfPIX = M_PI * padfValues[i];
3036 const double dfPIXoverR = dfPIX / 3;
3037 const double dfPIX2overR = dfPIX * dfPIXoverR;
3038 padfValues[i] = sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3039 }
3040 }
3041 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3042}
3043
3044/************************************************************************/
3045/* GWKBilinear() */
3046/************************************************************************/
3047
3048static double GWKBilinear( double dfX )
3049{
3050 double dfAbsX = fabs(dfX);
3051 if( dfAbsX <= 1.0 )
3052 return 1 - dfAbsX;
3053 else
3054 return 0.0;
3055}
3056
3057static double GWKBilinear4Values( double* padfValues )
3058{
3059 double dfAbsX0 = fabs(padfValues[0]);
3060 double dfAbsX1 = fabs(padfValues[1]);
3061 double dfAbsX2 = fabs(padfValues[2]);
3062 double dfAbsX3 = fabs(padfValues[3]);
3063 if( dfAbsX0 <= 1.0 )
3064 padfValues[0] = 1 - dfAbsX0;
3065 else
3066 padfValues[0] = 0.0;
3067 if( dfAbsX1 <= 1.0 )
3068 padfValues[1] = 1 - dfAbsX1;
3069 else
3070 padfValues[1] = 0.0;
3071 if( dfAbsX2 <= 1.0 )
3072 padfValues[2] = 1 - dfAbsX2;
3073 else
3074 padfValues[2] = 0.0;
3075 if( dfAbsX3 <= 1.0 )
3076 padfValues[3] = 1 - dfAbsX3;
3077 else
3078 padfValues[3] = 0.0;
3079 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3080}
3081
3082/************************************************************************/
3083/* GWKCubic() */
3084/************************************************************************/
3085
3086static double GWKCubic( double dfX )
3087{
3088 // http://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
3089 // W(x) formula with a = -0.5 (cubic hermite spline )
3090 // or http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
3091 // k(x) (formula 8) with (B,C)=(0,0.5) the Catmull-Rom spline
3092 double dfAbsX = fabs(dfX);
3093 if( dfAbsX <= 1.0 )
3094 {
3095 double dfX2 = dfX * dfX;
3096 return dfX2 * (1.5 * dfAbsX - 2.5) + 1;
3097 }
3098 else if( dfAbsX <= 2.0 )
3099 {
3100 double dfX2 = dfX * dfX;
3101 return dfX2 * (-0.5 * dfAbsX + 2.5) - 4 * dfAbsX + 2;
3102 }
3103 else
3104 return 0.0;
3105}
3106
3107static double GWKCubic4Values( double* padfValues )
3108{
3109 const double dfAbsX_0 = fabs(padfValues[0]);
3110 const double dfAbsX_1 = fabs(padfValues[1]);
3111 const double dfAbsX_2 = fabs(padfValues[2]);
3112 const double dfAbsX_3 = fabs(padfValues[3]);
3113 const double dfX2_0 = padfValues[0] * padfValues[0];
3114 const double dfX2_1 = padfValues[1] * padfValues[1];
3115 const double dfX2_2 = padfValues[2] * padfValues[2];
3116 const double dfX2_3 = padfValues[3] * padfValues[3];
3117
3118 double dfVal0 = 0.0;
3119 if( dfAbsX_0 <= 1.0 )
3120 dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3121 else if( dfAbsX_0 <= 2.0 )
3122 dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3123
3124 double dfVal1 = 0.0;
3125 if( dfAbsX_1 <= 1.0 )
3126 dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3127 else if( dfAbsX_1 <= 2.0 )
3128 dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3129
3130 double dfVal2 = 0.0;
3131 if( dfAbsX_2 <= 1.0 )
3132 dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3133 else if( dfAbsX_2 <= 2.0 )
3134 dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3135
3136 double dfVal3 = 0.0;
3137 if( dfAbsX_3 <= 1.0 )
3138 dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3139 else if( dfAbsX_3 <= 2.0 )
3140 dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3141
3142 padfValues[0] = dfVal0;
3143 padfValues[1] = dfVal1;
3144 padfValues[2] = dfVal2;
3145 padfValues[3] = dfVal3;
3146 return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3147}
3148
3149/************************************************************************/
3150/* GWKBSpline() */
3151/************************************************************************/
3152
3153// http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
3154// with (B,C)=(1,0)
3155// 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3156// 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| > 1
3157
3158static double GWKBSpline( double x )
3159{
3160 const double xp2 = x + 2.0;
3161 const double xp1 = x + 1.0;
3162 const double xm1 = x - 1.0;
3163
3164 // This will most likely be used, so we'll compute it ahead of time to
3165 // avoid stalling the processor.
3166 const double xp2c = xp2 * xp2 * xp2;
3167
3168 // Note that the test is computed only if it is needed.
3169 // TODO(schwehr): Make this easier to follow.
3170 return
3171 xp2 > 0.0
3172 ? ((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)?
3173 -4.0 * xm1*xm1*xm1:0.0) +
3174 6.0 * x*x*x:0.0) +
3175 -4.0 * xp1*xp1*xp1:0.0) + xp2c
3176 : 0.0; // * 0.166666666666666666666
3177}
3178
3179static double GWKBSpline4Values( double* padfValues )
3180{
3181 for( int i = 0; i < 4; i++ )
3182 {
3183 const double x = padfValues[i];
3184 const double xp2 = x + 2.0;
3185 const double xp1 = x + 1.0;
3186 const double xm1 = x - 1.0;
3187
3188 // This will most likely be used, so we'll compute it ahead of time to
3189 // avoid stalling the processor.
3190 const double xp2c = xp2 * xp2 * xp2;
3191
3192 // Note that the test is computed only if it is needed.
3193 // TODO(schwehr): Make this easier to follow.
3194 padfValues[i] = (xp2 > 0.0)?((xp1 > 0.0)?((x > 0.0)?((xm1 > 0.0)?
3195 -4.0 * xm1*xm1*xm1:0.0) +
3196 6.0 * x*x*x:0.0) +
3197 -4.0 * xp1*xp1*xp1:0.0) +
3198 xp2c:0.0; // * 0.166666666666666666666
3199 }
3200 return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3201}
3202/************************************************************************/
3203/* GWKResampleWrkStruct */
3204/************************************************************************/
3205
3206typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3207
3208typedef bool (*pfnGWKResampleType) ( GDALWarpKernel *poWK, int iBand,
3209 double dfSrcX, double dfSrcY,
3210 double *pdfDensity,
3211 double *pdfReal, double *pdfImag,
3212 GWKResampleWrkStruct* psWrkStruct );
3213
3214struct _GWKResampleWrkStruct
3215{
3216 pfnGWKResampleType pfnGWKResample;
3217
3218 // Space for saved X weights.
3219 double *padfWeightsX;
3220 bool *pabCalcX;
3221
3222 double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3223 int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3224 int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3225 double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3226 double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3227
3228 // Space for saving a row of pixels.
3229 double *padfRowDensity;
3230 double *padfRowReal;
3231 double *padfRowImag;
3232};
3233
3234/************************************************************************/
3235/* GWKResampleCreateWrkStruct() */
3236/************************************************************************/
3237
3238static bool GWKResample( GDALWarpKernel *poWK, int iBand,
3239 double dfSrcX, double dfSrcY,
3240 double *pdfDensity,
3241 double *pdfReal, double *pdfImag,
3242 GWKResampleWrkStruct* psWrkStruct );
3243
3244static bool GWKResampleOptimizedLanczos( GDALWarpKernel *poWK, int iBand,
3245 double dfSrcX, double dfSrcY,
3246 double *pdfDensity,
3247 double *pdfReal, double *pdfImag,
3248 GWKResampleWrkStruct* psWrkStruct );
3249
3250static GWKResampleWrkStruct* GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3251{
3252 const int nXDist = ( poWK->nXRadius + 1 ) * 2;
3253 const int nYDist = ( poWK->nYRadius + 1 ) * 2;
3254
3255 GWKResampleWrkStruct* psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3256 CPLMalloc(sizeof(GWKResampleWrkStruct)));
3257
3258 // Alloc space for saved X weights.
3259 psWrkStruct->padfWeightsX =
3260 static_cast<double *>(CPLCalloc( nXDist, sizeof(double)));
3261 psWrkStruct->pabCalcX =
3262 static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3263
3264 psWrkStruct->padfWeightsY =
3265 static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3266 psWrkStruct->iLastSrcX = -10;
3267 psWrkStruct->iLastSrcY = -10;
3268 psWrkStruct->dfLastDeltaX = -10;
3269 psWrkStruct->dfLastDeltaY = -10;
3270
3271 // Alloc space for saving a row of pixels.
3272 if( poWK->pafUnifiedSrcDensity == nullptr &&
3273 poWK->panUnifiedSrcValid == nullptr &&
3274 poWK->papanBandSrcValid == nullptr )
3275 {
3276 psWrkStruct->padfRowDensity = nullptr;
3277 }
3278 else
3279 {
3280 psWrkStruct->padfRowDensity =
3281 static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3282 }
3283 psWrkStruct->padfRowReal =
3284 static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3285 psWrkStruct->padfRowImag =
3286 static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3287
3288 if( poWK->eResample == GRA_Lanczos )
3289 {
3290 psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3291
3292 const double dfXScale = poWK->dfXScale;
3293 if( dfXScale < 1.0 )
3294 {
3295 int iMin = poWK->nFiltInitX;
3296 int iMax = poWK->nXRadius;
3297 while( iMin * dfXScale < -3.0 )
3298 iMin++;
3299 while( iMax * dfXScale > 3.0 )
3300 iMax--;
3301
3302 for( int i = iMin; i <= iMax; ++i )
3303 {
3304 psWrkStruct->padfWeightsX[i-poWK->nFiltInitX] =
3305 GWKLanczosSinc(i * dfXScale);
3306 }
3307 }
3308
3309 const double dfYScale = poWK->dfYScale;
3310 if( dfYScale < 1.0 )
3311 {
3312 int jMin = poWK->nFiltInitY;
3313 int jMax = poWK->nYRadius;
3314 while( jMin * dfYScale < -3.0 )
3315 jMin++;
3316 while( jMax * dfYScale > 3.0 )
3317 jMax--;
3318
3319 for( int j = jMin; j <= jMax; ++j )
3320 {
3321 psWrkStruct->padfWeightsY[j-poWK->nFiltInitY] =
3322 GWKLanczosSinc(j * dfYScale);
3323 }
3324 }
3325 }
3326 else
3327 psWrkStruct->pfnGWKResample = GWKResample;
3328
3329 return psWrkStruct;
3330}
3331
3332/************************************************************************/
3333/* GWKResampleDeleteWrkStruct() */
3334/************************************************************************/
3335
3336static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct* psWrkStruct)
3337{
3338 CPLFree( psWrkStruct->padfWeightsX );
3339 CPLFree( psWrkStruct->padfWeightsY );
3340 CPLFree( psWrkStruct->pabCalcX );
3341 CPLFree( psWrkStruct->padfRowDensity );
3342 CPLFree( psWrkStruct->padfRowReal );
3343 CPLFree( psWrkStruct->padfRowImag );
3344 CPLFree( psWrkStruct );
3345}
3346
3347/************************************************************************/
3348/* GWKResample() */
3349/************************************************************************/
3350
3351static bool GWKResample( GDALWarpKernel *poWK, int iBand,
3352 double dfSrcX, double dfSrcY,
3353 double *pdfDensity,
3354 double *pdfReal, double *pdfImag,
3355 GWKResampleWrkStruct* psWrkStruct )
3356
3357{
3358 // Save as local variables to avoid following pointers in loops.
3359 const int nSrcXSize = poWK->nSrcXSize;
3360 const int nSrcYSize = poWK->nSrcYSize;
3361
3362 double dfAccumulatorReal = 0.0;
3363 double dfAccumulatorImag = 0.0;
3364 double dfAccumulatorDensity = 0.0;
3365 double dfAccumulatorWeight = 0.0;
3366 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3367 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3368 const int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
3369 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3370 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3371
3372 const double dfXScale = poWK->dfXScale;
3373 const double dfYScale = poWK->dfYScale;
3374
3375 const int nXDist = ( poWK->nXRadius + 1 ) * 2;
3376
3377 // Space for saved X weights.
3378 double *padfWeightsX = psWrkStruct->padfWeightsX;
3379 bool *pabCalcX = psWrkStruct->pabCalcX;
3380
3381 // Space for saving a row of pixels.
3382 double *padfRowDensity = psWrkStruct->padfRowDensity;
3383 double *padfRowReal = psWrkStruct->padfRowReal;
3384 double *padfRowImag = psWrkStruct->padfRowImag;
3385
3386 // Mark as needing calculation (don't calculate the weights yet,
3387 // because a mask may render it unnecessary).
3388 memset( pabCalcX, false, nXDist * sizeof(bool) );
3389
3390 FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3391 CPLAssert(pfnGetWeight);
3392
3393 // Skip sampling over edge of image.
3394 int j = poWK->nFiltInitY;
3395 int jMax= poWK->nYRadius;
3396 if( iSrcY + j < 0 )
3397 j = -iSrcY;
3398 if( iSrcY + jMax >= nSrcYSize )
3399 jMax = nSrcYSize - iSrcY - 1;
3400
3401 int iMin = poWK->nFiltInitX;
3402 int iMax = poWK->nXRadius;
3403 if( iSrcX + iMin < 0 )
3404 iMin = -iSrcX;
3405 if( iSrcX + iMax >= nSrcXSize )
3406 iMax = nSrcXSize - iSrcX - 1;
3407
3408 const int bXScaleBelow1 = ( dfXScale < 1.0 );
3409 const int bYScaleBelow1 = ( dfYScale < 1.0 );
3410
3411 int iRowOffset = iSrcOffset + (j - 1) * nSrcXSize + iMin;
3412
3413 // Loop over pixel rows in the kernel.
3414 for( ; j <= jMax; ++j )
3415 {
3416 iRowOffset += nSrcXSize;
3417
3418 // Get pixel values.
3419 // We can potentially read extra elements after the "normal" end of the
3420 // source arrays, but the contract of papabySrcImage[iBand],
3421 // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3422 // is to have WARP_EXTRA_ELTS reserved at their end.
3423 if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2,
3424 padfRowDensity, padfRowReal, padfRowImag ) )
3425 continue;
3426
3427 // Calculate the Y weight.
3428 double dfWeight1 = ( bYScaleBelow1 ) ?
3429 pfnGetWeight((j - dfDeltaY) * dfYScale):
3430 pfnGetWeight(j - dfDeltaY);
3431
3432 // Iterate over pixels in row.
3433 double dfAccumulatorRealLocal = 0.0;
3434 double dfAccumulatorImagLocal = 0.0;
3435 double dfAccumulatorDensityLocal = 0.0;
3436 double dfAccumulatorWeightLocal = 0.0;
3437
3438 for( int i = iMin; i <= iMax; ++i )
3439 {
3440 // Skip sampling if pixel has zero density.
3441 if( padfRowDensity != nullptr &&
3442 padfRowDensity[i-iMin] < SRC_DENSITY_THRESHOLD )
3443 continue;
3444
3445 double dfWeight2 = 0.0;
3446
3447 // Make or use a cached set of weights for this row.
3448 if( pabCalcX[i-iMin] )
3449 {
3450 // Use saved weight value instead of recomputing it.
3451 dfWeight2 = padfWeightsX[i-iMin];
3452 }
3453 else
3454 {
3455 // Calculate & save the X weight.
3456 padfWeightsX[i-iMin] = dfWeight2 = ( bXScaleBelow1 ) ?
3457 pfnGetWeight((i - dfDeltaX) * dfXScale):
3458 pfnGetWeight(i - dfDeltaX);
3459
3460 pabCalcX[i-iMin] = true;
3461 }
3462
3463 // Accumulate!
3464 dfAccumulatorRealLocal += padfRowReal[i-iMin] * dfWeight2;
3465 dfAccumulatorImagLocal += padfRowImag[i-iMin] * dfWeight2;
3466 if( padfRowDensity != nullptr )
3467 dfAccumulatorDensityLocal += padfRowDensity[i-iMin] * dfWeight2;
3468 dfAccumulatorWeightLocal += dfWeight2;
3469 }
3470
3471 dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3472 dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3473 dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3474 dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3475 }
3476
3477 if( dfAccumulatorWeight < 0.000001 ||
3478 (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001) )
3479 {
3480 *pdfDensity = 0.0;
3481 return false;
3482 }
3483
3484 // Calculate the output taking into account weighting.
3485 if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 )
3486 {
3487 *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3488 *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3489 if( padfRowDensity != nullptr )
3490 *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3491 else
3492 *pdfDensity = 1.0;
3493 }
3494 else
3495 {
3496 *pdfReal = dfAccumulatorReal;
3497 *pdfImag = dfAccumulatorImag;
3498 if( padfRowDensity != nullptr )
3499 *pdfDensity = dfAccumulatorDensity;
3500 else
3501 *pdfDensity = 1.0;
3502 }
3503
3504 return true;
3505}
3506
3507/************************************************************************/
3508/* GWKResampleOptimizedLanczos() */
3509/************************************************************************/
3510
3511static bool GWKResampleOptimizedLanczos( GDALWarpKernel *poWK, int iBand,
3512 double dfSrcX, double dfSrcY,
3513 double *pdfDensity,
3514 double *pdfReal, double *pdfImag,
3515 GWKResampleWrkStruct* psWrkStruct )
3516
3517{
3518 // Save as local variables to avoid following pointers in loops.
3519 const int nSrcXSize = poWK->nSrcXSize;
3520 const int nSrcYSize = poWK->nSrcYSize;
3521
3522 double dfAccumulatorReal = 0.0;
3523 double dfAccumulatorImag = 0.0;
3524 double dfAccumulatorDensity = 0.0;
3525 double dfAccumulatorWeight = 0.0;
3526 const int iSrcX = (int) floor( dfSrcX - 0.5 );
3527 const int iSrcY = (int) floor( dfSrcY - 0.5 );
3528 const int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
3529 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3530 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3531
3532 const double dfXScale = poWK->dfXScale;
3533 const double dfYScale = poWK->dfYScale;
3534
3535 // Space for saved X weights.
3536 double *padfWeightsX = psWrkStruct->padfWeightsX;
3537 double *padfWeightsY = psWrkStruct->padfWeightsY;
3538
3539 // Space for saving a row of pixels.
3540 double *padfRowDensity = psWrkStruct->padfRowDensity;
3541 double *padfRowReal = psWrkStruct->padfRowReal;
3542 double *padfRowImag = psWrkStruct->padfRowImag;
3543
3544 // Skip sampling over edge of image.
3545 int jMin = poWK->nFiltInitY;
3546 int jMax = poWK->nYRadius;
3547 if( iSrcY + jMin < 0 )
3548 jMin = -iSrcY;
3549 if( iSrcY + jMax >= nSrcYSize )
3550 jMax = nSrcYSize - iSrcY - 1;
3551
3552 int iMin = poWK->nFiltInitX;
3553 int iMax = poWK->nXRadius;
3554 if( iSrcX + iMin < 0 )
3555 iMin = -iSrcX;
3556 if( iSrcX + iMax >= nSrcXSize )
3557 iMax = nSrcXSize - iSrcX - 1;
3558
3559 if( dfXScale < 1.0 )
3560 {
3561 while( iMin * dfXScale < -3.0 )
3562 iMin++;
3563 while( iMax * dfXScale > 3.0 )
3564 iMax--;
3565 // padfWeightsX computed in GWKResampleCreateWrkStruct.
3566 }
3567 else
3568 {
3569 while( iMin - dfDeltaX < -3.0 )
3570 iMin++;
3571 while( iMax - dfDeltaX > 3.0 )
3572 iMax--;
3573
3574 if( iSrcX != psWrkStruct->iLastSrcX ||
3575 dfDeltaX != psWrkStruct->dfLastDeltaX )
3576 {
3577 // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
3578 // following trigonometric formulas.
3579
3580// TODO(schwehr): Move this somewhere where it can be rendered at LaTeX.
3581// sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) + cos(M_PI * dfBase) * sin(M_PI * k)
3582// sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
3583// sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
3584// sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
3585
3586// sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) + cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
3587// sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
3588
3589 const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
3590 const double dfSin2PIDeltaXOver3 =
3591 dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
3592 // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
3593 const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
3594 const double dfSinPIDeltaX =
3595 (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
3596 const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
3597 const double dfInvPI2Over3xSinPIDeltaX =
3598 dfInvPI2Over3 * dfSinPIDeltaX;
3599 const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
3600 -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
3601 const double dfSinPIOver3 = 0.8660254037844386;
3602 const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
3603 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
3604 const double padfCst[] = {
3605 dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
3606 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
3607 dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
3608 dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
3609 dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 };
3610
3611 for( int i = iMin; i <= iMax; ++i )
3612 {
3613 const double dfX = i - dfDeltaX;
3614 if( dfX == 0.0 )
3615 padfWeightsX[i-poWK->nFiltInitX] = 1.0;
3616 else
3617 padfWeightsX[i-poWK->nFiltInitX] =
3618 padfCst[(i + 3) % 3] / (dfX * dfX);
3619#if DEBUG_VERBOSE
3620 // TODO(schwehr): AlmostEqual.
3621 //CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
3622 // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
3623#endif
3624 }
3625
3626 psWrkStruct->iLastSrcX = iSrcX;
3627 psWrkStruct->dfLastDeltaX = dfDeltaX;
3628 }
3629 }
3630
3631 if( dfYScale < 1.0 )
3632 {
3633 while( jMin * dfYScale < -3.0 )
3634 jMin++;
3635 while( jMax * dfYScale > 3.0 )
3636 jMax--;
3637 // padfWeightsY computed in GWKResampleCreateWrkStruct.
3638 }
3639 else
3640 {
3641 while( jMin - dfDeltaY < -3.0 )
3642 jMin++;
3643 while( jMax - dfDeltaY > 3.0 )
3644 jMax--;
3645
3646 if( iSrcY != psWrkStruct->iLastSrcY ||
3647 dfDeltaY != psWrkStruct->dfLastDeltaY )
3648 {
3649 const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
3650 const double dfSin2PIDeltaYOver3 =
3651 dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
3652 // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
3653 const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
3654 const double dfSinPIDeltaY =
3655 (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
3656 const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
3657 const double dfInvPI2Over3xSinPIDeltaY =
3658 dfInvPI2Over3 * dfSinPIDeltaY;
3659 const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
3660 -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
3661 const double dfSinPIOver3 = 0.8660254037844386;
3662 const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
3663 dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
3664 const double padfCst[] = {
3665 dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
3666 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
3667 dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
3668 dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
3669 dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 };
3670
3671 for( int j = jMin; j <= jMax; ++j )
3672 {
3673 const double dfY = j - dfDeltaY;
3674 if( dfY == 0.0 )
3675 padfWeightsY[j-poWK->nFiltInitY] = 1.0;
3676 else
3677 padfWeightsY[j-poWK->nFiltInitY] =
3678 padfCst[(j + 3) % 3] / (dfY * dfY);
3679#if DEBUG_VERBOSE
3680 // TODO(schwehr): AlmostEqual.
3681 //CPLAssert(fabs(padfWeightsY[j-poWK->nFiltInitY] -
3682 // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
3683#endif
3684 }
3685
3686 psWrkStruct->iLastSrcY = iSrcY;
3687 psWrkStruct->dfLastDeltaY = dfDeltaY;
3688 }
3689 }
3690
3691 int iRowOffset = iSrcOffset + (jMin - 1) * nSrcXSize + iMin;
3692
3693 // If we have no density information, we can simply compute the
3694 // accumulated weight.
3695 if( padfRowDensity == nullptr )
3696 {
3697 double dfRowAccWeight = 0.0;
3698 for( int i = iMin; i <= iMax; ++i )
3699 {
3700 dfRowAccWeight += padfWeightsX[i-poWK->nFiltInitX];
3701 }
3702 double dfColAccWeight = 0.0;
3703 for( int j = jMin; j <= jMax; ++j )
3704 {
3705 dfColAccWeight += padfWeightsY[j-poWK->nFiltInitY];
3706 }
3707 dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
3708 }
3709
3710 const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
3711
3712 // Loop over pixel rows in the kernel.
3713 for( int j = jMin; j <= jMax; ++j )
3714 {
3715 iRowOffset += nSrcXSize;
3716
3717 // Get pixel values.
3718 // We can potentially read extra elements after the "normal" end of the
3719 // source arrays, but the contract of papabySrcImage[iBand],
3720 // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3721 // is to have WARP_EXTRA_ELTS reserved at their end.
3722 if( !GWKGetPixelRow( poWK, iBand, iRowOffset, (iMax-iMin+2)/2,
3723 padfRowDensity, padfRowReal, padfRowImag ) )
3724 continue;
3725
3726 const double dfWeight1 = padfWeightsY[j-poWK->nFiltInitY];
3727
3728 // Iterate over pixels in row.
3729 if( padfRowDensity != nullptr )
3730 {
3731 for( int i = iMin; i <= iMax; ++i )
3732 {
3733 // Skip sampling if pixel has zero density.
3734 if( padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD )
3735 continue;
3736
3737 // Use a cached set of weights for this row.
3738 const double dfWeight2 =
3739 dfWeight1 * padfWeightsX[i- poWK->nFiltInitX];
3740
3741 // Accumulate!
3742 dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
3743 dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
3744 dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
3745 dfAccumulatorWeight += dfWeight2;
3746 }
3747 }
3748 else if( bIsNonComplex )
3749 {
3750 double dfRowAccReal = 0.0;
3751 for( int i = iMin; i <= iMax; ++i )
3752 {
3753 const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX];
3754
3755 // Accumulate!
3756 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
3757 }
3758
3759 dfAccumulatorReal += dfRowAccReal * dfWeight1;
3760 }
3761 else
3762 {
3763 double dfRowAccReal = 0.0;
3764 double dfRowAccImag = 0.0;
3765 for( int i = iMin; i <= iMax; ++i )
3766 {
3767 const double dfWeight2 = padfWeightsX[i- poWK->nFiltInitX];
3768
3769 // Accumulate!
3770 dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
3771 dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
3772 }
3773
3774 dfAccumulatorReal += dfRowAccReal * dfWeight1;
3775 dfAccumulatorImag += dfRowAccImag * dfWeight1;
3776 }
3777 }
3778
3779 if( dfAccumulatorWeight < 0.000001 ||
3780 (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001) )
3781 {
3782 *pdfDensity = 0.0;
3783 return false;
3784 }
3785
3786 // Calculate the output taking into account weighting.
3787 if( dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001 )
3788 {
3789 const double dfInvAcc = 1.0 / dfAccumulatorWeight;
3790 *pdfReal = dfAccumulatorReal * dfInvAcc;
3791 *pdfImag = dfAccumulatorImag * dfInvAcc;
3792 if( padfRowDensity != nullptr )
3793 *pdfDensity = dfAccumulatorDensity * dfInvAcc;
3794 else
3795 *pdfDensity = 1.0;
3796 }
3797 else
3798 {
3799 *pdfReal = dfAccumulatorReal;
3800 *pdfImag = dfAccumulatorImag;
3801 if( padfRowDensity != nullptr )
3802 *pdfDensity = dfAccumulatorDensity;
3803 else
3804 *pdfDensity = 1.0;
3805 }
3806
3807 return true;
3808}
3809
3810/************************************************************************/
3811/* GWKResampleNoMasksT() */
3812/************************************************************************/
3813
3814template <class T>
3815static bool GWKResampleNoMasksT( GDALWarpKernel *poWK, int iBand,
3816 double dfSrcX, double dfSrcY,
3817 T *pValue, double *padfWeight )
3818
3819{
3820 // Commonly used; save locally.
3821 const int nSrcXSize = poWK->nSrcXSize;
3822 const int nSrcYSize = poWK->nSrcYSize;
3823
3824 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3825 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3826 const int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
3827
3828 const int nXRadius = poWK->nXRadius;
3829 const int nYRadius = poWK->nYRadius;
3830
3831 // Politely refuse to process invalid coordinates or obscenely small image.
3832 if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize
3833 || nXRadius > nSrcXSize || nYRadius > nSrcYSize )
3834 return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3835 pValue);
3836
3837 T* pSrcBand = reinterpret_cast<T*>(poWK->papabySrcImage[iBand]);
3838 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3839 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3840
3841 const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3842 CPLAssert(pfnGetWeight);
3843 const FilterFunc4ValuesType pfnGetWeight4Values =
3844 apfGWKFilter4Values[poWK->eResample];
3845 CPLAssert(pfnGetWeight4Values);
3846
3847 const double dfXScale = std::min(poWK->dfXScale, 1.0);
3848 const double dfYScale = std::min(poWK->dfYScale, 1.0);
3849
3850 // Loop over all rows in the kernel.
3851 double dfAccumulatorWeightHorizontal = 0.0;
3852 double dfAccumulatorWeightVertical = 0.0;
3853
3854 int iMin = 1 - nXRadius;
3855 if( iSrcX + iMin < 0 )
3856 iMin = -iSrcX;
3857 int iMax = nXRadius;
3858 if( iSrcX + iMax >= nSrcXSize-1 )
3859 iMax = nSrcXSize-1 - iSrcX;
3860 int i = iMin; // Used after for.
3861 int iC = 0; // Used after for.
3862 for( ; i+2 < iMax; i += 4, iC += 4 )
3863 {
3864 padfWeight[iC] = (i - dfDeltaX) * dfXScale;
3865 padfWeight[iC+1] = padfWeight[iC] + dfXScale;
3866 padfWeight[iC+2] = padfWeight[iC+1] + dfXScale;
3867 padfWeight[iC+3] = padfWeight[iC+2] + dfXScale;
3868 dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC);
3869 }
3870 for( ; i <= iMax; ++i, ++iC )
3871 {
3872 const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
3873 padfWeight[iC] = dfWeight;
3874 dfAccumulatorWeightHorizontal += dfWeight;
3875 }
3876
3877 int j = 1 - nYRadius;
3878 if( iSrcY + j < 0 )
3879 j = -iSrcY;
3880 int jMax = nYRadius;
3881 if( iSrcY + jMax >= nSrcYSize-1 )
3882 jMax = nSrcYSize-1 - iSrcY;
3883
3884 double dfAccumulator = 0.0;
3885
3886 for( ; j <= jMax; ++j )
3887 {
3888 const int iSampJ = iSrcOffset + j * nSrcXSize;
3889
3890 // Loop over all pixels in the row.
3891 double dfAccumulatorLocal = 0.0;
3892 double dfAccumulatorLocal2 = 0.0;
3893 iC = 0;
3894 i = iMin;
3895 // Process by chunk of 4 cols.
3896 for( ; i+2 < iMax; i += 4, iC += 4 )
3897 {
3898 // Retrieve the pixel & accumulate.
3899 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC];
3900 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1];
3901 dfAccumulatorLocal2 += pSrcBand[i+2+iSampJ] * padfWeight[iC+2];
3902 dfAccumulatorLocal2 += pSrcBand[i+3+iSampJ] * padfWeight[iC+3];
3903 }
3904 dfAccumulatorLocal += dfAccumulatorLocal2;
3905 if( i < iMax )
3906 {
3907 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC];
3908 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1];
3909 i += 2;
3910 iC += 2;
3911 }
3912 if( i == iMax )
3913 {
3914 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC];
3915 }
3916
3917 // Calculate the Y weight.
3918 const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
3919 dfAccumulator += dfWeight * dfAccumulatorLocal;
3920 dfAccumulatorWeightVertical += dfWeight;
3921 }
3922
3923 const double dfAccumulatorWeight =
3924 dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical;
3925
3926 *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight);
3927
3928 return true;
3929}
3930
3931/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
3932/* Could possibly be used too on 32bit, but we would need to check at runtime */
3933#if defined(__x86_64) || defined(_M_X64)
3934
3935/************************************************************************/
3936/* GWKResampleNoMasks_SSE2_T() */
3937/************************************************************************/
3938
3939template<class T>
3940static bool GWKResampleNoMasks_SSE2_T( GDALWarpKernel *poWK, int iBand,
3941 double dfSrcX, double dfSrcY,
3942 T *pValue, double *padfWeight )
3943{
3944 // Commonly used; save locally.
3945 const int nSrcXSize = poWK->nSrcXSize;
3946 const int nSrcYSize = poWK->nSrcYSize;
3947
3948 const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3949 const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3950 const int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
3951 const int nXRadius = poWK->nXRadius;
3952 const int nYRadius = poWK->nYRadius;
3953
3954 // Politely refuse to process invalid coordinates or obscenely small image.
3955 if( iSrcX >= nSrcXSize || iSrcY >= nSrcYSize ||
3956 nXRadius > nSrcXSize || nYRadius > nSrcYSize )
3957 return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3958 pValue);
3959
3960 const T* pSrcBand = reinterpret_cast<const T*>(poWK->papabySrcImage[iBand]);
3961
3962 const FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3963 CPLAssert(pfnGetWeight);
3964 const FilterFunc4ValuesType pfnGetWeight4Values =
3965 apfGWKFilter4Values[poWK->eResample];
3966 CPLAssert(pfnGetWeight4Values);
3967
3968 const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3969 const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3970 const double dfXScale = std::min(poWK->dfXScale, 1.0);
3971 const double dfYScale = std::min(poWK->dfYScale, 1.0);
3972
3973 // Loop over all rows in the kernel.
3974 double dfAccumulatorWeightHorizontal = 0.0;
3975 double dfAccumulatorWeightVertical = 0.0;
3976 double dfAccumulator = 0.0;
3977
3978 int iMin = 1 - nXRadius;
3979 if( iSrcX + iMin < 0 )
3980 iMin = -iSrcX;
3981 int iMax = nXRadius;
3982 if( iSrcX + iMax >= nSrcXSize-1 )
3983 iMax = nSrcXSize-1 - iSrcX;
3984 int i, iC;
3985 for( iC = 0, i = iMin; i+2 < iMax; i+=4, iC+=4 )
3986 {
3987 padfWeight[iC] = (i - dfDeltaX) * dfXScale;
3988 padfWeight[iC+1] = padfWeight[iC] + dfXScale;
3989 padfWeight[iC+2] = padfWeight[iC+1] + dfXScale;
3990 padfWeight[iC+3] = padfWeight[iC+2] + dfXScale;
3991 dfAccumulatorWeightHorizontal += pfnGetWeight4Values(padfWeight+iC);
3992 }
3993 for( ; i <= iMax; ++i, ++iC )
3994 {
3995 double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
3996 padfWeight[iC] = dfWeight;
3997 dfAccumulatorWeightHorizontal += dfWeight;
3998 }
3999
4000 int j = 1 - nYRadius;
4001 if( iSrcY + j < 0 )
4002 j = -iSrcY;
4003 int jMax = nYRadius;
4004 if( iSrcY + jMax >= nSrcYSize-1 )
4005 jMax = nSrcYSize-1 - iSrcY;
4006
4007 // Process by chunk of 4 rows.
4008 for( ; j+2 < jMax; j+=4 )
4009 {
4010 int iSampJ = iSrcOffset + j * nSrcXSize;
4011
4012 // Loop over all pixels in the row.
4013 iC = 0;
4014 i = iMin;
4015 // Process by chunk of 4 cols.
4016 XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4017 XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4018 XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4019 XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4020 for( ; i+2 < iMax; i+=4, iC+=4 )
4021 {
4022 // Retrieve the pixel & accumulate.
4023 XMMReg4Double v_pixels_1 =
4024 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ);
4025 XMMReg4Double v_pixels_2 =
4026 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+nSrcXSize);
4027 XMMReg4Double v_pixels_3 =
4028 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+2*nSrcXSize);
4029 XMMReg4Double v_pixels_4 =
4030 XMMReg4Double::Load4Val(pSrcBand+i+iSampJ+3*nSrcXSize);
4031
4032 XMMReg4Double v_padfWeight =
4033 XMMReg4Double::Load4Val(padfWeight + iC);
4034
4035 v_acc_1 += v_pixels_1 * v_padfWeight;
4036 v_acc_2 += v_pixels_2 * v_padfWeight;
4037 v_acc_3 += v_pixels_3 * v_padfWeight;
4038 v_acc_4 += v_pixels_4 * v_padfWeight;
4039 }
4040
4041 if( i < iMax )
4042 {
4043 XMMReg2Double v_pixels_1 =
4044 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ);
4045 XMMReg2Double v_pixels_2 =
4046 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+nSrcXSize);
4047 XMMReg2Double v_pixels_3 =
4048 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+2*nSrcXSize);
4049 XMMReg2Double v_pixels_4 =
4050 XMMReg2Double::Load2Val(pSrcBand+i+iSampJ+3*nSrcXSize);
4051
4052 XMMReg2Double v_padfWeight =
4053 XMMReg2Double::Load2Val(padfWeight + iC);
4054
4055 v_acc_1.AddToLow( v_pixels_1 * v_padfWeight );
4056 v_acc_2.AddToLow( v_pixels_2 * v_padfWeight );
4057 v_acc_3.AddToLow( v_pixels_3 * v_padfWeight );
4058 v_acc_4.AddToLow( v_pixels_4 * v_padfWeight );
4059
4060 i+=2;
4061 iC+=2;
4062 }
4063
4064 double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4065 double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4066 double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4067 double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4068
4069 if( i == iMax )
4070 {
4071 dfAccumulatorLocal_1 +=
4072 static_cast<double>(pSrcBand[i+iSampJ]) * padfWeight[iC];
4073 dfAccumulatorLocal_2 +=
4074 static_cast<double>(pSrcBand[i+iSampJ + nSrcXSize]) *
4075 padfWeight[iC];
4076 dfAccumulatorLocal_3 +=
4077 static_cast<double>(pSrcBand[i+iSampJ + 2 * nSrcXSize]) *
4078 padfWeight[iC];
4079 dfAccumulatorLocal_4 +=
4080 static_cast<double>(pSrcBand[i+iSampJ + 3 * nSrcXSize]) *
4081 padfWeight[iC];
4082 }
4083
4084 // Calculate the Y weight.
4085 const double dfWeight0 = (j - dfDeltaY) * dfYScale;
4086 const double dfWeight1 = dfWeight0 + dfYScale;
4087 const double dfWeight2 = dfWeight1 + dfYScale;
4088 const double dfWeight3 = dfWeight2 + dfYScale;
4089 double adfWeight[4] = { dfWeight0, dfWeight1, dfWeight2, dfWeight3 };
4090
4091 dfAccumulatorWeightVertical += pfnGetWeight4Values(adfWeight);
4092 dfAccumulator += adfWeight[0] * dfAccumulatorLocal_1;
4093 dfAccumulator += adfWeight[1] * dfAccumulatorLocal_2;
4094 dfAccumulator += adfWeight[2] * dfAccumulatorLocal_3;
4095 dfAccumulator += adfWeight[3] * dfAccumulatorLocal_4;
4096 }
4097 for( ; j <= jMax; ++j )
4098 {
4099 int iSampJ = iSrcOffset + j * nSrcXSize;
4100
4101 // Loop over all pixels in the row.
4102 iC = 0;
4103 i = iMin;
4104 // Process by chunk of 4 cols.
4105 XMMReg4Double v_acc = XMMReg4Double::Zero();
4106 for( ; i+2 < iMax; i+=4, iC+=4 )
4107 {
4108 // Retrieve the pixel & accumulate.
4109 XMMReg4Double v_pixels= XMMReg4Double::Load4Val(pSrcBand+i+iSampJ);
4110 XMMReg4Double v_padfWeight =
4111 XMMReg4Double::Load4Val(padfWeight + iC);
4112
4113 v_acc += v_pixels * v_padfWeight;
4114 }
4115
4116 double dfAccumulatorLocal = v_acc.GetHorizSum();
4117
4118 if( i < iMax )
4119 {
4120 dfAccumulatorLocal += pSrcBand[i+iSampJ] * padfWeight[iC];
4121 dfAccumulatorLocal += pSrcBand[i+1+iSampJ] * padfWeight[iC+1];
4122 i+=2;
4123 iC+=2;
4124 }
4125 if( i == iMax )
4126 {
4127 dfAccumulatorLocal += (double)pSrcBand[i+iSampJ] * padfWeight[iC];
4128 }
4129
4130 // Calculate the Y weight.
4131 double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4132 dfAccumulator += dfWeight * dfAccumulatorLocal;
4133 dfAccumulatorWeightVertical += dfWeight;
4134 }
4135
4136 const double dfAccumulatorWeight =
4137 dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical;
4138
4139 *pValue = GWKClampValueT<T>(dfAccumulator / dfAccumulatorWeight);
4140
4141 return true;
4142}
4143
4144/************************************************************************/
4145/* GWKResampleNoMasksT<GByte>() */
4146/************************************************************************/
4147
4148template<>
4149bool GWKResampleNoMasksT<GByte>( GDALWarpKernel *poWK, int iBand,
4150 double dfSrcX, double dfSrcY,
4151 GByte *pValue, double *padfWeight )
4152{
4153 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY,
4154 pValue, padfWeight);
4155}
4156
4157/************************************************************************/
4158/* GWKResampleNoMasksT<GInt16>() */
4159/************************************************************************/
4160
4161template<>
4162bool GWKResampleNoMasksT<GInt16>( GDALWarpKernel *poWK, int iBand,
4163 double dfSrcX, double dfSrcY,
4164 GInt16 *pValue, double *padfWeight )
4165{
4166 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY,
4167 pValue, padfWeight);
4168}
4169
4170/************************************************************************/
4171/* GWKResampleNoMasksT<GUInt16>() */
4172/************************************************************************/
4173
4174template<>
4175bool GWKResampleNoMasksT<GUInt16>( GDALWarpKernel *poWK, int iBand,
4176 double dfSrcX, double dfSrcY,
4177 GUInt16 *pValue, double *padfWeight )
4178{
4179 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY,
4180 pValue, padfWeight);
4181}
4182
4183/************************************************************************/
4184/* GWKResampleNoMasksT<float>() */
4185/************************************************************************/
4186
4187template<>
4188bool GWKResampleNoMasksT<float>( GDALWarpKernel *poWK, int iBand,
4189 double dfSrcX, double dfSrcY,
4190 float *pValue, double *padfWeight )
4191{
4192 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY,
4193 pValue, padfWeight);
4194}
4195
4196#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4197
4198/************************************************************************/
4199/* GWKResampleNoMasksT<double>() */
4200/************************************************************************/
4201
4202template<>
4203bool GWKResampleNoMasksT<double>( GDALWarpKernel *poWK, int iBand,
4204 double dfSrcX, double dfSrcY,
4205 double *pValue, double *padfWeight )
4206{
4207 return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY,
4208 pValue, padfWeight);
4209}
4210
4211#endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4212
4213#endif /* defined(__x86_64) || defined(_M_X64) */
4214
4215/************************************************************************/
4216/* GWKRoundSourceCoordinates() */
4217/************************************************************************/
4218
4219static void GWKRoundSourceCoordinates( int nDstXSize,
4220 double* padfX,
4221 double* padfY,
4222 double* padfZ,
4223 int* pabSuccess,
4224 double dfSrcCoordPrecision,
4225 double dfErrorThreshold,
4226 GDALTransformerFunc pfnTransformer,
4227 void* pTransformerArg,
4228 double dfDstXOff,
4229 double dfDstY )
4230{
4231 double dfPct = 0.8;
4232 if( dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0 )
4233 {
4234 dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4235 }
4236 const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4237
4238 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4239 {
4240 const double dfXBefore = padfX[iDstX];
4241 const double dfYBefore = padfY[iDstX];
4242 padfX[iDstX] =
4243 floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4244 dfSrcCoordPrecision;
4245 padfY[iDstX] =
4246 floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4247 dfSrcCoordPrecision;
4248
4249 // If we are in an uncertainty zone, go to non-approximated
4250 // transformation.
4251 // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4252 // be at least 10 times greater than the approximation error.
4253 if( fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4254 fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold )
4255 {
4256 padfX[iDstX] = iDstX + dfDstXOff;
4257 padfY[iDstX] = dfDstY;
4258 padfZ[iDstX] = 0.0;
4259 pfnTransformer( pTransformerArg, TRUE, 1,
4260 padfX + iDstX, padfY + iDstX,
4261 padfZ + iDstX, pabSuccess + iDstX );
4262 padfX[iDstX] =
4263 floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4264 dfSrcCoordPrecision;
4265 padfY[iDstX] =
4266 floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4267 dfSrcCoordPrecision;
4268 }
4269 }
4270}
4271
4272/************************************************************************/
4273/* GWKOpenCLCase() */
4274/* */
4275/* This is identical to GWKGeneralCase(), but functions via */
4276/* OpenCL. This means we have vector optimization (SSE) and/or */
4277/* GPU optimization depending on our prefs. The code itself is */
4278/* general and not optimized, but by defining constants we can */
4279/* make some pretty darn good code on the fly. */
4280/************************************************************************/
4281
4282#if defined(HAVE_OPENCL)
4283static CPLErr GWKOpenCLCase( GDALWarpKernel *poWK )
4284{
4285 const int nDstXSize = poWK->nDstXSize;
4286 const int nDstYSize = poWK->nDstYSize;
4287 const int nSrcXSize = poWK->nSrcXSize;
4288 const int nSrcYSize = poWK->nSrcYSize;
4289 const int nDstXOff = poWK->nDstXOff;
4290 const int nDstYOff = poWK->nDstYOff;
4291 const int nSrcXOff = poWK->nSrcXOff;
4292 const int nSrcYOff = poWK->nSrcYOff;
4293 bool bUseImag = false;
4294
4295 cl_channel_type imageFormat;
4296 switch( poWK->eWorkingDataType )
4297 {
4298 case GDT_Byte:
4299 imageFormat = CL_UNORM_INT8;
4300 break;
4301 case GDT_UInt16:
4302 imageFormat = CL_UNORM_INT16;
4303 break;
4304 case GDT_CInt16:
4305 bUseImag = true;
4306 CPL_FALLTHROUGH
4307 case GDT_Int16:
4308 imageFormat = CL_SNORM_INT16;
4309 break;
4310 case GDT_CFloat32:
4311 bUseImag = true;
4312 CPL_FALLTHROUGH
4313 case GDT_Float32:
4314 imageFormat = CL_FLOAT;
4315 break;
4316 default:
4317 // No support for higher precision formats.
4318 CPLDebug("OpenCL",
4319 "Unsupported resampling OpenCL data type %d.",
4320 static_cast<int>(poWK->eWorkingDataType));
4321 return CE_Warning;
4322 }
4323
4324 OCLResampAlg resampAlg;
4325 switch( poWK->eResample )
4326 {
4327 case GRA_Bilinear:
4328 resampAlg = OCL_Bilinear;
4329 break;
4330 case GRA_Cubic:
4331 resampAlg = OCL_Cubic;
4332 break;
4333 case GRA_CubicSpline:
4334 resampAlg = OCL_CubicSpline;
4335 break;
4336 case GRA_Lanczos:
4337 resampAlg = OCL_Lanczos;
4338 break;
4339 default:
4340 // No support for higher precision formats.
4341 CPLDebug("OpenCL",
4342 "Unsupported resampling OpenCL resampling alg %d.",
4343 static_cast<int>(poWK->eResample));
4344 return CE_Warning;
4345 }
4346
4347 struct oclWarper *warper = NULL;
4348 cl_int err;
4349 CPLErr eErr = CE_None;
4350
4351 // TODO(schwehr): Fix indenting.
4352 try
4353 {
4354
4355 // Using a factor of 2 or 4 seems to have much less rounding error
4356 // than 3 on the GPU.
4357 // Then the rounding error can cause strange artifacts under the
4358 // right conditions.
4359 warper =
4360 GDALWarpKernelOpenCL_createEnv(nSrcXSize, nSrcYSize,
4361 nDstXSize, nDstYSize,
4362 imageFormat, poWK->nBands, 4,
4363 bUseImag,
4364 poWK->papanBandSrcValid != NULL,
4365 poWK->pafDstDensity,
4366 poWK->padfDstNoDataReal,
4367 resampAlg, &err);
4368
4369 if( err != CL_SUCCESS || warper == NULL )
4370 {
4371 eErr = CE_Warning;
4372 if( warper != NULL )
4373 throw eErr;
4374 return eErr;
4375 }
4376
4377 CPLDebug("GDAL", "GDALWarpKernel()::GWKOpenCLCase() "
4378 "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
4379 nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize,
4380 nDstXOff, nDstYOff, nDstXSize, nDstYSize);
4381
4382 if( !poWK->pfnProgress( poWK->dfProgressBase, "", poWK->pProgress ) )
4383 {
4384 CPLError( CE_Failure, CPLE_UserInterrupt, "User terminated" );
4385 eErr = CE_Failure;
4386 throw eErr;
4387 }
4388
4389 /* ==================================================================== */
4390 /* Loop over bands. */
4391 /* ==================================================================== */
4392 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
4393 {
4394 if( poWK->papanBandSrcValid != NULL &&
4395 poWK->papanBandSrcValid[iBand] != NULL)
4396 {
4397 GDALWarpKernelOpenCL_setSrcValid(
4398 warper, (int *)poWK->papanBandSrcValid[iBand], iBand);
4399 if( err != CL_SUCCESS )
4400 {
4401 CPLError( CE_Failure, CPLE_AppDefined,
4402 "OpenCL routines reported failure (%d) on line %d.",
4403 static_cast<int>(err), __LINE__ );
4404 eErr = CE_Failure;
4405 throw eErr;
4406 }
4407 }
4408
4409 err = GDALWarpKernelOpenCL_setSrcImg(warper,
4410 poWK->papabySrcImage[iBand],
4411 iBand);
4412 if( err != CL_SUCCESS )
4413 {
4414 CPLError(CE_Failure, CPLE_AppDefined,
4415 "OpenCL routines reported failure (%d) on line %d.",
4416 static_cast<int>(err), __LINE__);
4417 eErr = CE_Failure;
4418 throw eErr;
4419 }
4420
4421 err = GDALWarpKernelOpenCL_setDstImg(warper,
4422 poWK->papabyDstImage[iBand],
4423 iBand);
4424 if( err != CL_SUCCESS )
4425 {
4426 CPLError(CE_Failure, CPLE_AppDefined,
4427 "OpenCL routines reported failure (%d) on line %d.",
4428 static_cast<int>(err), __LINE__);
4429 eErr = CE_Failure;
4430 throw eErr;
4431 }
4432 }
4433
4434 /* -------------------------------------------------------------------- */
4435 /* Allocate x,y,z coordinate arrays for transformation ... one */
4436 /* scanlines worth of positions. */
4437 /* -------------------------------------------------------------------- */
4438
4439 // For x, 2 *, because we cache the precomputed values at the end.
4440 double *padfX =
4441 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
4442 double * padfY =
4443 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4444 double *padfZ =
4445 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4446 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
4447 const double dfSrcCoordPrecision = CPLAtof(
4448 CSLFetchNameValueDef(poWK->papszWarpOptions,
4449 "SRC_COORD_PRECISION", "0"));
4450 const double dfErrorThreshold = CPLAtof(
4451 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
4452
4453 // Precompute values.
4454 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4455 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
4456
4457 /* ==================================================================== */
4458 /* Loop over output lines. */
4459 /* ==================================================================== */
4460 for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY )
4461 {
4462 /* ---------------------------------------------------------------- */
4463 /* Setup points to transform to source image space. */
4464 /* ---------------------------------------------------------------- */
4465 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize );
4466 const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
4467 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4468 padfY[iDstX] = dfYConst;
4469 memset( padfZ, 0, sizeof(double) * nDstXSize );
4470
4471 /* ---------------------------------------------------------------- */
4472 /* Transform the points from destination pixel/line coordinates*/
4473 /* to source pixel/line coordinates. */
4474 /* ---------------------------------------------------------------- */
4475 poWK->pfnTransformer( poWK->pTransformerArg, TRUE, nDstXSize,
4476 padfX, padfY, padfZ, pabSuccess );
4477 if( dfSrcCoordPrecision > 0.0 )
4478 {
4479 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ,
4480 pabSuccess,
4481 dfSrcCoordPrecision,
4482 dfErrorThreshold,
4483 poWK->pfnTransformer,
4484 poWK->pTransformerArg,
4485 0.5 + nDstXOff,
4486 iDstY + 0.5 + nDstYOff);
4487 }
4488
4489 err = GDALWarpKernelOpenCL_setCoordRow(warper, padfX, padfY,
4490 nSrcXOff, nSrcYOff,
4491 pabSuccess, iDstY);
4492 if( err != CL_SUCCESS )
4493 {
4494 CPLError(CE_Failure, CPLE_AppDefined,
4495 "OpenCL routines reported failure (%d) on line %d.",
4496 static_cast<int>(err), __LINE__);
4497 eErr = CE_Failure;
4498 break;
4499 }
4500
4501 // Update the valid & density masks because we don't do so in the
4502 // kernel.
4503 for( int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++ )
4504 {
4505 const double dfX = padfX[iDstX];
4506 const double dfY = padfY[iDstX];
4507 const int iDstOffset = iDstX + iDstY * nDstXSize;
4508
4509 // See GWKGeneralCase() for appropriate commenting.
4510 if( !pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff )
4511 continue;
4512
4513 int iSrcX = ((int) dfX) - nSrcXOff;
4514 int iSrcY = ((int) dfY) - nSrcYOff;
4515
4516 if( iSrcX < 0 || iSrcX >= nSrcXSize ||
4517 iSrcY < 0 || iSrcY >= nSrcYSize )
4518 continue;
4519
4520 int iSrcOffset = iSrcX + iSrcY * nSrcXSize;
4521 double dfDensity = 1.0;
4522
4523 if( poWK->pafUnifiedSrcDensity != NULL
4524 && iSrcX >= 0 && iSrcY >= 0
4525 && iSrcX < nSrcXSize && iSrcY < nSrcYSize )
4526 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
4527
4528 GWKOverlayDensity( poWK, iDstOffset, dfDensity );
4529
4530 // Because this is on the bit-wise level, it can't be done well in
4531 // OpenCL.
4532 if( poWK->panDstValid != NULL )
4533 poWK->panDstValid[iDstOffset>>5] |= 0x01 << (iDstOffset & 0x1f);
4534 }
4535 }
4536
4537 CPLFree( padfX );
4538 CPLFree( padfY );
4539 CPLFree( padfZ );
4540 CPLFree( pabSuccess );
4541
4542 if( eErr != CE_None )
4543 throw eErr;
4544
4545 err = GDALWarpKernelOpenCL_runResamp(warper,
4546 poWK->pafUnifiedSrcDensity,
4547 poWK->panUnifiedSrcValid,
4548 poWK->pafDstDensity,
4549 poWK->panDstValid,
4550 poWK->dfXScale, poWK->dfYScale,
4551 poWK->dfXFilter, poWK->dfYFilter,
4552 poWK->nXRadius, poWK->nYRadius,
4553 poWK->nFiltInitX, poWK->nFiltInitY);
4554
4555 if( err != CL_SUCCESS )
4556 {
4557 CPLError(CE_Failure, CPLE_AppDefined,
4558 "OpenCL routines reported failure (%d) on line %d.",
4559 static_cast<int>(err), __LINE__);
4560 eErr = CE_Failure;
4561 throw eErr;
4562 }
4563
4564 /* ==================================================================== */
4565 /* Loop over output lines. */
4566 /* ==================================================================== */
4567 for( int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++ )
4568 {
4569 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
4570 {
4571 void *rowReal = NULL;
4572 void *rowImag = NULL;
4573 GByte *pabyDst = poWK->papabyDstImage[iBand];
4574
4575 err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
4576 iDstY, iBand);
4577 if( err != CL_SUCCESS )
4578 {
4579 CPLError( CE_Failure, CPLE_AppDefined,
4580 "OpenCL routines reported failure (%d) on line %d.",
4581 static_cast<int>(err), __LINE__ );
4582 eErr = CE_Failure;
4583 throw eErr;
4584 }
4585
4586 // Copy the data from the warper to GDAL's memory.
4587 switch( poWK->eWorkingDataType )
4588 {
4589 // TODO(schwehr): Fix casting.
4590 case GDT_Byte:
4591 memcpy((void **)&(((GByte *)pabyDst)[iDstY*nDstXSize]),
4592 rowReal, sizeof(GByte)*nDstXSize);
4593 break;
4594 case GDT_Int16:
4595 memcpy((void **)&(((GInt16 *)pabyDst)[iDstY*nDstXSize]),
4596 rowReal, sizeof(GInt16)*nDstXSize);
4597 break;
4598 case GDT_UInt16:
4599 memcpy((void **)&(((GUInt16 *)pabyDst)[iDstY*nDstXSize]),
4600 rowReal, sizeof(GUInt16)*nDstXSize);
4601 break;
4602 case GDT_Float32:
4603 memcpy((void **)&(((float *)pabyDst)[iDstY*nDstXSize]),
4604 rowReal, sizeof(float)*nDstXSize);
4605 break;
4606 case GDT_CInt16:
4607 {
4608 GInt16 *pabyDstI16 = &(((GInt16 *)pabyDst)[iDstY*nDstXSize]);
4609 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4610 {
4611 pabyDstI16[iDstX*2 ] = ((GInt16 *)rowReal)[iDstX];
4612 pabyDstI16[iDstX*2+1] = ((GInt16 *)rowImag)[iDstX];
4613 }
4614 }
4615 break;
4616 case GDT_CFloat32:
4617 {
4618 float *pabyDstF32 = &(((float *)pabyDst)[iDstY*nDstXSize]);
4619 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4620 {
4621 pabyDstF32[iDstX*2 ] = ((float *)rowReal)[iDstX];
4622 pabyDstF32[iDstX*2+1] = ((float *)rowImag)[iDstX];
4623 }
4624 }
4625 break;
4626 default:
4627 // No support for higher precision formats.
4628 CPLError(CE_Failure, CPLE_AppDefined,
4629 "Unsupported resampling OpenCL data type %d.",
4630 static_cast<int>(poWK->eWorkingDataType) );
4631 eErr = CE_Failure;
4632 throw eErr;
4633 }
4634 }
4635 }
4636 }
4637 catch( const CPLErr& )
4638 {
4639 }
4640
4641 if( (err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS )
4642 {
4643 CPLError(CE_Failure, CPLE_AppDefined,
4644 "OpenCL routines reported failure (%d) on line %d.",
4645 static_cast<int>(err), __LINE__ );
4646 return CE_Failure;
4647 }
4648
4649 return eErr;
4650}
4651#endif /* defined(HAVE_OPENCL) */
4652
4653/************************************************************************/
4654/* GWKCheckAndComputeSrcOffsets() */
4655/************************************************************************/
4656static CPL_INLINE bool GWKCheckAndComputeSrcOffsets(
4657 const int* _pabSuccess,
4658 int _iDstX,
4659 const double* _padfX,
4660 const double* _padfY,
4661 const GDALWarpKernel* _poWK,
4662 int _nSrcXSize,
4663 int _nSrcYSize,
4664 int& iSrcOffset)
4665{
4666 if( !_pabSuccess[_iDstX] )
4667 return false;
4668
4669 // If this happens this is likely the symptom of a bug somewhere.
4670 if( CPLIsNan(_padfX[_iDstX]) || CPLIsNan(_padfY[_iDstX]) )
4671 {
4672 static bool bNanCoordFound = false;
4673 if( !bNanCoordFound )
4674 {
4675 CPLDebug("WARP", "NaN coordinate found.");
4676 bNanCoordFound = true;
4677 }
4678 return false;
4679 }
4680
4681/* -------------------------------------------------------------------- */
4682/* Figure out what pixel we want in our source raster, and skip */
4683/* further processing if it is well off the source image. */
4684/* -------------------------------------------------------------------- */
4685 /* We test against the value before casting to avoid the */
4686 /* problem of asymmetric truncation effects around zero. That is */
4687 /* -0.5 will be 0 when cast to an int. */
4688 if( _padfX[_iDstX] < _poWK->nSrcXOff
4689 || _padfY[_iDstX] < _poWK->nSrcYOff )
4690 return false;
4691
4692 // Check for potential overflow when casting from float to int, (if
4693 // operating outside natural projection area, padfX/Y can be a very huge
4694 // positive number before doing the actual conversion), as such cast is
4695 // undefined behaviour that can trigger exception with some compilers
4696 // (see #6753)
4697 if( _padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff ||
4698 _padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff )
4699 {
4700 return false;
4701 }
4702
4703 const int iSrcX =
4704 static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
4705 const int iSrcY =
4706 static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
4707
4708 // Those checks should normally be OK given the previous ones.
4709 CPLAssert( iSrcX >= 0 );
4710 CPLAssert( iSrcY >= 0 );
4711 CPLAssert( iSrcX < _nSrcXSize );
4712 CPLAssert( iSrcY < _nSrcYSize );
4713
4714 iSrcOffset = iSrcX + iSrcY * _nSrcXSize;
4715
4716 return true;
4717}
4718
4719/************************************************************************/
4720/* GWKGeneralCase() */
4721/* */
4722/* This is the most general case. It attempts to handle all */
4723/* possible features with relatively little concern for */
4724/* efficiency. */
4725/************************************************************************/
4726
4727static void GWKGeneralCaseThread( void* pData)
4728
4729{
4730 GWKJobStruct* psJob = reinterpret_cast<GWKJobStruct *>(pData);
4731 GDALWarpKernel *poWK = psJob->poWK;
4732 const int iYMin = psJob->iYMin;
4733 const int iYMax = psJob->iYMax;
4734
4735 int nDstXSize = poWK->nDstXSize;
4736 int nSrcXSize = poWK->nSrcXSize;
4737 int nSrcYSize = poWK->nSrcYSize;
4738
4739/* -------------------------------------------------------------------- */
4740/* Allocate x,y,z coordinate arrays for transformation ... one */
4741/* scanlines worth of positions. */
4742/* -------------------------------------------------------------------- */
4743 // For x, 2 *, because we cache the precomputed values at the end.
4744 double *padfX =
4745 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
4746 double *padfY =
4747 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4748 double *padfZ =
4749 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4750 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
4751
4752 const bool bUse4SamplesFormula =
4753 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
4754
4755 GWKResampleWrkStruct* psWrkStruct = nullptr;
4756 if( poWK->eResample != GRA_NearestNeighbour )
4757 {
4758 psWrkStruct = GWKResampleCreateWrkStruct(poWK);
4759 }
4760 const double dfSrcCoordPrecision = CPLAtof(
4761 CSLFetchNameValueDef(poWK->papszWarpOptions,
4762 "SRC_COORD_PRECISION", "0"));
4763 const double dfErrorThreshold = CPLAtof(
4764 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
4765
4766 // Precompute values.
4767 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4768 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
4769
4770/* ==================================================================== */
4771/* Loop over output lines. */
4772/* ==================================================================== */
4773 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ )
4774 {
4775/* -------------------------------------------------------------------- */
4776/* Setup points to transform to source image space. */
4777/* -------------------------------------------------------------------- */
4778 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize );
4779 const double dfY = iDstY + 0.5 + poWK->nDstYOff;
4780 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4781 padfY[iDstX] = dfY;
4782 memset( padfZ, 0, sizeof(double) * nDstXSize );
4783
4784/* -------------------------------------------------------------------- */
4785/* Transform the points from destination pixel/line coordinates */
4786/* to source pixel/line coordinates. */
4787/* -------------------------------------------------------------------- */
4788 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
4789 padfX, padfY, padfZ, pabSuccess );
4790 if( dfSrcCoordPrecision > 0.0 )
4791 {
4792 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ,
4793 pabSuccess,
4794 dfSrcCoordPrecision,
4795 dfErrorThreshold,
4796 poWK->pfnTransformer,
4797 psJob->pTransformerArg,
4798 0.5 + poWK->nDstXOff,
4799 iDstY + 0.5 + poWK->nDstYOff);
4800 }
4801
4802/* ==================================================================== */
4803/* Loop over pixels in output scanline. */
4804/* ==================================================================== */
4805 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4806 {
4807 int iSrcOffset = 0;
4808 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY,
4809 poWK, nSrcXSize, nSrcYSize, iSrcOffset) )
4810 continue;
4811
4812/* -------------------------------------------------------------------- */
4813/* Do not try to apply transparent/invalid source pixels to the */
4814/* destination. This currently ignores the multi-pixel input */
4815/* of bilinear and cubic resamples. */
4816/* -------------------------------------------------------------------- */
4817 double dfDensity = 1.0;
4818
4819 if( poWK->pafUnifiedSrcDensity != nullptr )
4820 {
4821 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
4822 if( dfDensity < SRC_DENSITY_THRESHOLD )
4823 continue;
4824 }
4825
4826 if( poWK->panUnifiedSrcValid != nullptr
4827 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
4828 & (0x01 << (iSrcOffset & 0x1f))) )
4829 continue;
4830
4831/* ==================================================================== */
4832/* Loop processing each band. */
4833/* ==================================================================== */
4834 bool bHasFoundDensity = false;
4835
4836 const int iDstOffset = iDstX + iDstY * nDstXSize;
4837 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
4838 {
4839 double dfBandDensity = 0.0;
4840 double dfValueReal = 0.0;
4841 double dfValueImag = 0.0;
4842
4843/* -------------------------------------------------------------------- */
4844/* Collect the source value. */
4845/* -------------------------------------------------------------------- */
4846 if( poWK->eResample == GRA_NearestNeighbour ||
4847 nSrcXSize == 1 || nSrcYSize == 1 )
4848 {
4849 // FALSE is returned if dfBandDensity == 0, which is
4850 // checked below.
4851 CPL_IGNORE_RET_VAL(
4852 GWKGetPixelValue( poWK, iBand, iSrcOffset,
4853 &dfBandDensity,
4854 &dfValueReal, &dfValueImag ));
4855 }
4856 else if( poWK->eResample == GRA_Bilinear &&
4857 bUse4SamplesFormula )
4858 {
4859 GWKBilinearResample4Sample( poWK, iBand,
4860 padfX[iDstX]-poWK->nSrcXOff,
4861 padfY[iDstX]-poWK->nSrcYOff,
4862 &dfBandDensity,
4863 &dfValueReal, &dfValueImag );
4864 }
4865 else if( poWK->eResample == GRA_Cubic &&
4866 bUse4SamplesFormula )
4867 {
4868 GWKCubicResample4Sample( poWK, iBand,
4869 padfX[iDstX]-poWK->nSrcXOff,
4870 padfY[iDstX]-poWK->nSrcYOff,
4871 &dfBandDensity,
4872 &dfValueReal, &dfValueImag );
4873 }
4874 else
4875#ifdef DEBUG
4876 // Only useful for clang static analyzer.
4877 if( psWrkStruct != nullptr )
4878#endif
4879 {
4880 psWrkStruct->pfnGWKResample( poWK, iBand,
4881 padfX[iDstX]-poWK->nSrcXOff,
4882 padfY[iDstX]-poWK->nSrcYOff,
4883 &dfBandDensity,
4884 &dfValueReal, &dfValueImag, psWrkStruct );
4885 }
4886
4887 // If we didn't find any valid inputs skip to next band.
4888 if( dfBandDensity < BAND_DENSITY_THRESHOLD )
4889 continue;
4890
4891 bHasFoundDensity = true;
4892
4893/* -------------------------------------------------------------------- */
4894/* We have a computed value from the source. Now apply it to */
4895/* the destination pixel. */
4896/* -------------------------------------------------------------------- */
4897 GWKSetPixelValue( poWK, iBand, iDstOffset,
4898 dfBandDensity,
4899 dfValueReal, dfValueImag );
4900 }
4901
4902 if( !bHasFoundDensity )
4903 continue;
4904
4905/* -------------------------------------------------------------------- */
4906/* Update destination density/validity masks. */
4907/* -------------------------------------------------------------------- */
4908 GWKOverlayDensity( poWK, iDstOffset, dfDensity );
4909
4910 if( poWK->panDstValid != nullptr )
4911 {
4912 poWK->panDstValid[iDstOffset>>5] |=
4913 0x01 << (iDstOffset & 0x1f);
4914 }
4915 } /* Next iDstX */
4916
4917/* -------------------------------------------------------------------- */
4918/* Report progress to the user, and optionally cancel out. */
4919/* -------------------------------------------------------------------- */
4920 if( psJob->pfnProgress && psJob->pfnProgress(psJob) )
4921 break;
4922 }
4923
4924/* -------------------------------------------------------------------- */
4925/* Cleanup and return. */
4926/* -------------------------------------------------------------------- */
4927 CPLFree( padfX );
4928 CPLFree( padfY );
4929 CPLFree( padfZ );
4930 CPLFree( pabSuccess );
4931 if( psWrkStruct )
4932 GWKResampleDeleteWrkStruct(psWrkStruct);
4933}
4934
4935static CPLErr GWKGeneralCase( GDALWarpKernel *poWK )
4936{
4937 return GWKRun( poWK, "GWKGeneralCase", GWKGeneralCaseThread );
4938}
4939
4940/************************************************************************/
4941/* GWKRealCase() */
4942/* */
4943/* General case for non-complex data types. */
4944/************************************************************************/
4945
4946static void GWKRealCaseThread( void* pData)
4947
4948{
4949 GWKJobStruct* psJob = static_cast<GWKJobStruct*>(pData);
4950 GDALWarpKernel *poWK = psJob->poWK;
4951 const int iYMin = psJob->iYMin;
4952 const int iYMax = psJob->iYMax;
4953
4954 const int nDstXSize = poWK->nDstXSize;
4955 const int nSrcXSize = poWK->nSrcXSize;
4956 const int nSrcYSize = poWK->nSrcYSize;
4957
4958/* -------------------------------------------------------------------- */
4959/* Allocate x,y,z coordinate arrays for transformation ... one */
4960/* scanlines worth of positions. */
4961/* -------------------------------------------------------------------- */
4962
4963 // For x, 2 *, because we cache the precomputed values at the end.
4964 double *padfX =
4965 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
4966 double *padfY =
4967 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4968 double *padfZ =
4969 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
4970 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
4971
4972 const bool bUse4SamplesFormula =
4973 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
4974
4975 GWKResampleWrkStruct* psWrkStruct = nullptr;
4976 if( poWK->eResample != GRA_NearestNeighbour )
4977 {
4978 psWrkStruct = GWKResampleCreateWrkStruct(poWK);
4979 }
4980 const double dfSrcCoordPrecision = CPLAtof(
4981 CSLFetchNameValueDef(poWK->papszWarpOptions,
4982 "SRC_COORD_PRECISION", "0"));
4983 const double dfErrorThreshold = CPLAtof(
4984 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
4985
4986 const bool bSrcMaskIsDensity =
4987 poWK->panUnifiedSrcValid == nullptr &&
4988 poWK->papanBandSrcValid == nullptr &&
4989 poWK->pafUnifiedSrcDensity != nullptr;
4990
4991 // Precompute values.
4992 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
4993 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
4994
4995/* ==================================================================== */
4996/* Loop over output lines. */
4997/* ==================================================================== */
4998 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ )
4999 {
5000/* -------------------------------------------------------------------- */
5001/* Setup points to transform to source image space. */
5002/* -------------------------------------------------------------------- */
5003 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize );
5004 const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5005 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5006 padfY[iDstX] = dfY;
5007 memset( padfZ, 0, sizeof(double) * nDstXSize );
5008
5009/* -------------------------------------------------------------------- */
5010/* Transform the points from destination pixel/line coordinates */
5011/* to source pixel/line coordinates. */
5012/* -------------------------------------------------------------------- */
5013 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
5014 padfX, padfY, padfZ, pabSuccess );
5015 if( dfSrcCoordPrecision > 0.0 )
5016 {
5017 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ,
5018 pabSuccess,
5019 dfSrcCoordPrecision,
5020 dfErrorThreshold,
5021 poWK->pfnTransformer,
5022 psJob->pTransformerArg,
5023 0.5 + poWK->nDstXOff,
5024 iDstY + 0.5 + poWK->nDstYOff);
5025 }
5026
5027/* ==================================================================== */
5028/* Loop over pixels in output scanline. */
5029/* ==================================================================== */
5030 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5031 {
5032 int iSrcOffset = 0;
5033 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY,
5034 poWK, nSrcXSize, nSrcYSize, iSrcOffset) )
5035 continue;
5036
5037/* -------------------------------------------------------------------- */
5038/* Do not try to apply transparent/invalid source pixels to the */
5039/* destination. This currently ignores the multi-pixel input */
5040/* of bilinear and cubic resamples. */
5041/* -------------------------------------------------------------------- */
5042 double dfDensity = 1.0;
5043
5044 if( poWK->pafUnifiedSrcDensity != nullptr )
5045 {
5046 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5047 if( dfDensity < SRC_DENSITY_THRESHOLD )
5048 continue;
5049 }
5050
5051 if( poWK->panUnifiedSrcValid != nullptr
5052 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
5053 & (0x01 << (iSrcOffset & 0x1f))) )
5054 continue;
5055
5056/* ==================================================================== */
5057/* Loop processing each band. */
5058/* ==================================================================== */
5059 bool bHasFoundDensity = false;
5060
5061 const int iDstOffset = iDstX + iDstY * nDstXSize;
5062 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
5063 {
5064 double dfBandDensity = 0.0;
5065 double dfValueReal = 0.0;
5066
5067/* -------------------------------------------------------------------- */
5068/* Collect the source value. */
5069/* -------------------------------------------------------------------- */
5070 if( poWK->eResample == GRA_NearestNeighbour ||
5071 nSrcXSize == 1 || nSrcYSize == 1 )
5072 {
5073 // FALSE is returned if dfBandDensity == 0, which is
5074 // checked below.
5075 CPL_IGNORE_RET_VAL(
5076 GWKGetPixelValueReal( poWK, iBand, iSrcOffset,
5077 &dfBandDensity, &dfValueReal ));
5078 }
5079 else if( poWK->eResample == GRA_Bilinear &&
5080 bUse4SamplesFormula )
5081 {
5082 double dfValueImagIgnored = 0.0;
5083 GWKBilinearResample4Sample( poWK, iBand,
5084 padfX[iDstX]-poWK->nSrcXOff,
5085 padfY[iDstX]-poWK->nSrcYOff,
5086 &dfBandDensity,
5087 &dfValueReal, &dfValueImagIgnored );
5088 }
5089 else if( poWK->eResample == GRA_Cubic &&
5090 bUse4SamplesFormula )
5091 {
5092 if( bSrcMaskIsDensity )
5093 {
5094 if( poWK->eWorkingDataType == GDT_Byte )
5095 {
5096 GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5097 poWK, iBand,
5098 padfX[iDstX]-poWK->nSrcXOff,
5099 padfY[iDstX]-poWK->nSrcYOff,
5100 &dfBandDensity,
5101 &dfValueReal );
5102 }
5103 else if( poWK->eWorkingDataType == GDT_UInt16 )
5104 {
5105 GWKCubicResampleSrcMaskIsDensity4SampleRealT<GUInt16>(
5106 poWK, iBand,
5107 padfX[iDstX]-poWK->nSrcXOff,
5108 padfY[iDstX]-poWK->nSrcYOff,
5109 &dfBandDensity,
5110 &dfValueReal );
5111 }
5112 else
5113 {
5114 GWKCubicResampleSrcMaskIsDensity4SampleReal( poWK, iBand,
5115 padfX[iDstX]-poWK->nSrcXOff,
5116 padfY[iDstX]-poWK->nSrcYOff,
5117 &dfBandDensity,
5118 &dfValueReal );
5119 }
5120 }
5121 else
5122 {
5123 double dfValueImagIgnored = 0.0;
5124 GWKCubicResample4Sample( poWK, iBand,
5125 padfX[iDstX]-poWK->nSrcXOff,
5126 padfY[iDstX]-poWK->nSrcYOff,
5127 &dfBandDensity,
5128 &dfValueReal,
5129 &dfValueImagIgnored);
5130 }
5131 }
5132 else
5133#ifdef DEBUG
5134 // Only useful for clang static analyzer.
5135 if( psWrkStruct != nullptr )
5136#endif
5137 {
5138 double dfValueImagIgnored = 0.0;
5139 psWrkStruct->pfnGWKResample(
5140 poWK, iBand,
5141 padfX[iDstX]-poWK->nSrcXOff,
5142 padfY[iDstX]-poWK->nSrcYOff,
5143 &dfBandDensity,
5144 &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5145 }
5146
5147 // If we didn't find any valid inputs skip to next band.
5148 if( dfBandDensity < BAND_DENSITY_THRESHOLD )
5149 continue;
5150
5151 bHasFoundDensity = true;
5152
5153/* -------------------------------------------------------------------- */
5154/* We have a computed value from the source. Now apply it to */
5155/* the destination pixel. */
5156/* -------------------------------------------------------------------- */
5157 GWKSetPixelValueReal(poWK, iBand, iDstOffset,
5158 dfBandDensity,
5159 dfValueReal);
5160 }
5161
5162 if( !bHasFoundDensity )
5163 continue;
5164
5165/* -------------------------------------------------------------------- */
5166/* Update destination density/validity masks. */
5167/* -------------------------------------------------------------------- */
5168 GWKOverlayDensity( poWK, iDstOffset, dfDensity );
5169
5170 if( poWK->panDstValid != nullptr )
5171 {
5172 poWK->panDstValid[iDstOffset>>5] |=
5173 0x01 << (iDstOffset & 0x1f);
5174 }
5175 } // Next iDstX.
5176
5177/* -------------------------------------------------------------------- */
5178/* Report progress to the user, and optionally cancel out. */
5179/* -------------------------------------------------------------------- */
5180 if( psJob->pfnProgress && psJob->pfnProgress(psJob) )
5181 break;
5182 }
5183
5184/* -------------------------------------------------------------------- */
5185/* Cleanup and return. */
5186/* -------------------------------------------------------------------- */
5187 CPLFree( padfX );
5188 CPLFree( padfY );
5189 CPLFree( padfZ );
5190 CPLFree( pabSuccess );
5191 if( psWrkStruct )
5192 GWKResampleDeleteWrkStruct(psWrkStruct);
5193}
5194
5195static CPLErr GWKRealCase( GDALWarpKernel *poWK )
5196{
5197 return GWKRun( poWK, "GWKRealCase", GWKRealCaseThread );
5198}
5199
5200/************************************************************************/
5201/* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
5202/************************************************************************/
5203
5204template<class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
5205static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal( void* pData )
5206
5207{
5208 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData);
5209 GDALWarpKernel *poWK = psJob->poWK;
5210 const int iYMin = psJob->iYMin;
5211 const int iYMax = psJob->iYMax;
5212
5213 const int nDstXSize = poWK->nDstXSize;
5214 const int nSrcXSize = poWK->nSrcXSize;
5215 const int nSrcYSize = poWK->nSrcYSize;
5216
5217/* -------------------------------------------------------------------- */
5218/* Allocate x,y,z coordinate arrays for transformation ... one */
5219/* scanlines worth of positions. */
5220/* -------------------------------------------------------------------- */
5221
5222 // For x, 2 *, because we cache the precomputed values at the end.
5223 double *padfX =
5224 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5225 double *padfY =
5226 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5227 double *padfZ =
5228 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5229 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5230
5231 const int nXRadius = poWK->nXRadius;
5232 double *padfWeight =
5233 static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
5234 const double dfSrcCoordPrecision = CPLAtof(
5235 CSLFetchNameValueDef(poWK->papszWarpOptions,
5236 "SRC_COORD_PRECISION", "0"));
5237 const double dfErrorThreshold = CPLAtof(
5238 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5239
5240 // Precompute values.
5241 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5242 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5243
5244/* ==================================================================== */
5245/* Loop over output lines. */
5246/* ==================================================================== */
5247 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ )
5248 {
5249/* -------------------------------------------------------------------- */
5250/* Setup points to transform to source image space. */
5251/* -------------------------------------------------------------------- */
5252 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize );
5253 const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5254 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5255 padfY[iDstX] = dfY;
5256 memset( padfZ, 0, sizeof(double) * nDstXSize );
5257
5258/* -------------------------------------------------------------------- */
5259/* Transform the points from destination pixel/line coordinates */
5260/* to source pixel/line coordinates. */
5261/* -------------------------------------------------------------------- */
5262 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
5263 padfX, padfY, padfZ, pabSuccess );
5264 if( dfSrcCoordPrecision > 0.0 )
5265 {
5266 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
5267 dfSrcCoordPrecision,
5268 dfErrorThreshold,
5269 poWK->pfnTransformer,
5270 psJob->pTransformerArg,
5271 0.5 + poWK->nDstXOff,
5272 iDstY + 0.5 + poWK->nDstYOff);
5273 }
5274
5275/* ==================================================================== */
5276/* Loop over pixels in output scanline. */
5277/* ==================================================================== */
5278 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5279 {
5280 int iSrcOffset = 0;
5281 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY,
5282 poWK, nSrcXSize, nSrcYSize,
5283 iSrcOffset) )
5284 continue;
5285
5286/* ==================================================================== */
5287/* Loop processing each band. */
5288/* ==================================================================== */
5289 const int iDstOffset = iDstX + iDstY * nDstXSize;
5290
5291 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
5292 {
5293 T value = 0;
5294 if( eResample == GRA_NearestNeighbour )
5295 {
5296 value = reinterpret_cast<T *>(
5297 poWK->papabySrcImage[iBand])[iSrcOffset];
5298 }
5299 else if( bUse4SamplesFormula )
5300 {
5301 if( eResample == GRA_Bilinear )
5302 GWKBilinearResampleNoMasks4SampleT( poWK, iBand,
5303 padfX[iDstX]-poWK->nSrcXOff,
5304 padfY[iDstX]-poWK->nSrcYOff,
5305 &value );
5306 else
5307 GWKCubicResampleNoMasks4SampleT( poWK, iBand,
5308 padfX[iDstX]-poWK->nSrcXOff,
5309 padfY[iDstX]-poWK->nSrcYOff,
5310 &value );
5311 }
5312 else
5313 {
5314 GWKResampleNoMasksT( poWK, iBand,
5315 padfX[iDstX]-poWK->nSrcXOff,
5316 padfY[iDstX]-poWK->nSrcYOff,
5317 &value,
5318 padfWeight);
5319 }
5320 reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5321 value;
5322 }
5323
5324 if( poWK->pafDstDensity )
5325 poWK->pafDstDensity[iDstOffset] = 1.0f;
5326 }
5327
5328/* -------------------------------------------------------------------- */
5329/* Report progress to the user, and optionally cancel out. */
5330/* -------------------------------------------------------------------- */
5331 if( psJob->pfnProgress && psJob->pfnProgress(psJob) )
5332 break;
5333 }
5334
5335/* -------------------------------------------------------------------- */
5336/* Cleanup and return. */
5337/* -------------------------------------------------------------------- */
5338 CPLFree( padfX );
5339 CPLFree( padfY );
5340 CPLFree( padfZ );
5341 CPLFree( pabSuccess );
5342 CPLFree( padfWeight );
5343}
5344
5345template<class T, GDALResampleAlg eResample>
5346static void GWKResampleNoMasksOrDstDensityOnlyThread( void* pData )
5347{
5348 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample,
5349 FALSE>(pData);
5350}
5351
5352template<class T, GDALResampleAlg eResample>
5353static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread( void* pData )
5354
5355{
5356 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData);
5357 GDALWarpKernel *poWK = psJob->poWK;
5358 CPLAssert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
5359 const bool bUse4SamplesFormula =
5360 poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5361 if( bUse4SamplesFormula )
5362 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample,
5363 TRUE>(pData);
5364 else
5365 GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample,
5366 FALSE>(pData);
5367}
5368
5369static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK )
5370{
5371 return GWKRun(
5372 poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
5373 GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
5374}
5375
5376static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK )
5377{
5378 return GWKRun(
5379 poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
5380 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
5381 GRA_Bilinear>);
5382}
5383
5384static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK )
5385{
5386 return GWKRun(
5387 poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
5388 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
5389}
5390
5391static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK )
5392{
5393 return GWKRun(
5394 poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
5395 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
5396}
5397
5398#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5399
5400static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK )
5401{
5402 return GWKRun(
5403 poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
5404 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
5405}
5406#endif
5407
5408static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte( GDALWarpKernel *poWK )
5409{
5410 return GWKRun(
5411 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
5412 GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
5413}
5414
5415/************************************************************************/
5416/* GWKNearestByte() */
5417/* */
5418/* Case for 8bit input data with nearest neighbour resampling */
5419/* using valid flags. Should be as fast as possible for this */
5420/* particular transformation type. */
5421/************************************************************************/
5422
5423template<class T>
5424static void GWKNearestThread( void* pData )
5425
5426{
5427 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData);
5428 GDALWarpKernel *poWK = psJob->poWK;
5429 const int iYMin = psJob->iYMin;
5430 const int iYMax = psJob->iYMax;
5431
5432 const int nDstXSize = poWK->nDstXSize;
5433 const int nSrcXSize = poWK->nSrcXSize;
5434 const int nSrcYSize = poWK->nSrcYSize;
5435
5436/* -------------------------------------------------------------------- */
5437/* Allocate x,y,z coordinate arrays for transformation ... one */
5438/* scanlines worth of positions. */
5439/* -------------------------------------------------------------------- */
5440
5441 // For x, 2 *, because we cache the precomputed values at the end.
5442 double *padfX =
5443 static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5444 double *padfY =
5445 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5446 double *padfZ =
5447 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5448 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5449
5450 const double dfSrcCoordPrecision = CPLAtof(
5451 CSLFetchNameValueDef(poWK->papszWarpOptions,
5452 "SRC_COORD_PRECISION", "0"));
5453 const double dfErrorThreshold = CPLAtof(
5454 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5455
5456 // Precompute values.
5457 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5458 padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5459
5460/* ==================================================================== */
5461/* Loop over output lines. */
5462/* ==================================================================== */
5463 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ )
5464 {
5465
5466/* -------------------------------------------------------------------- */
5467/* Setup points to transform to source image space. */
5468/* -------------------------------------------------------------------- */
5469 memcpy( padfX, padfX + nDstXSize, sizeof(double) * nDstXSize );
5470 const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5471 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5472 padfY[iDstX] = dfY;
5473 memset( padfZ, 0, sizeof(double) * nDstXSize );
5474
5475/* -------------------------------------------------------------------- */
5476/* Transform the points from destination pixel/line coordinates */
5477/* to source pixel/line coordinates. */
5478/* -------------------------------------------------------------------- */
5479 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
5480 padfX, padfY, padfZ, pabSuccess );
5481 if( dfSrcCoordPrecision > 0.0 )
5482 {
5483 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ,
5484 pabSuccess,
5485 dfSrcCoordPrecision,
5486 dfErrorThreshold,
5487 poWK->pfnTransformer,
5488 psJob->pTransformerArg,
5489 0.5 + poWK->nDstXOff,
5490 iDstY + 0.5 + poWK->nDstYOff);
5491 }
5492/* ==================================================================== */
5493/* Loop over pixels in output scanline. */
5494/* ==================================================================== */
5495 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5496 {
5497 int iSrcOffset = 0;
5498 if( !GWKCheckAndComputeSrcOffsets(pabSuccess, iDstX, padfX, padfY,
5499 poWK, nSrcXSize, nSrcYSize, iSrcOffset) )
5500 continue;
5501
5502/* -------------------------------------------------------------------- */
5503/* Do not try to apply invalid source pixels to the dest. */
5504/* -------------------------------------------------------------------- */
5505 if( poWK->panUnifiedSrcValid != nullptr
5506 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
5507 & (0x01 << (iSrcOffset & 0x1f))) )
5508 continue;
5509
5510/* -------------------------------------------------------------------- */
5511/* Do not try to apply transparent source pixels to the destination.*/
5512/* -------------------------------------------------------------------- */
5513 double dfDensity = 1.0;
5514
5515 if( poWK->pafUnifiedSrcDensity != nullptr )
5516 {
5517 dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5518 if( dfDensity < SRC_DENSITY_THRESHOLD )
5519 continue;
5520 }
5521
5522/* ==================================================================== */
5523/* Loop processing each band. */
5524/* ==================================================================== */
5525
5526 const int iDstOffset = iDstX + iDstY * nDstXSize;
5527
5528 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
5529 {
5530 T value = 0;
5531 double dfBandDensity = 0.0;
5532
5533/* -------------------------------------------------------------------- */
5534/* Collect the source value. */
5535/* -------------------------------------------------------------------- */
5536 if( GWKGetPixelT(poWK, iBand, iSrcOffset,
5537 &dfBandDensity, &value) )
5538 {
5539 if( dfBandDensity < 1.0 )
5540 {
5541 if( dfBandDensity == 0.0 )
5542 {
5543 // Do nothing.
5544 }
5545 else
5546 {
5547 // Let the general code take care of mixing.
5548 GWKSetPixelValueRealT( poWK, iBand, iDstOffset,
5549 dfBandDensity, value );
5550 }
5551 }
5552 else
5553 {
5554 reinterpret_cast<T *>(
5555 poWK->papabyDstImage[iBand])[iDstOffset] = value;
5556 }
5557 }
5558 }
5559
5560/* -------------------------------------------------------------------- */
5561/* Mark this pixel valid/opaque in the output. */
5562/* -------------------------------------------------------------------- */
5563 GWKOverlayDensity( poWK, iDstOffset, dfDensity );
5564
5565 if( poWK->panDstValid != nullptr )
5566 {
5567 poWK->panDstValid[iDstOffset>>5] |=
5568 0x01 << (iDstOffset & 0x1f);
5569 }
5570 } /* Next iDstX */
5571
5572/* -------------------------------------------------------------------- */
5573/* Report progress to the user, and optionally cancel out. */
5574/* -------------------------------------------------------------------- */
5575 if( psJob->pfnProgress && psJob->pfnProgress(psJob) )
5576 break;
5577 }
5578
5579/* -------------------------------------------------------------------- */
5580/* Cleanup and return. */
5581/* -------------------------------------------------------------------- */
5582 CPLFree( padfX );
5583 CPLFree( padfY );
5584 CPLFree( padfZ );
5585 CPLFree( pabSuccess );
5586}
5587
5588static CPLErr GWKNearestByte( GDALWarpKernel *poWK )
5589{
5590 return GWKRun( poWK, "GWKNearestByte", GWKNearestThread<GByte> );
5591}
5592
5593static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK )
5594{
5595 return GWKRun(
5596 poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
5597 GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
5598}
5599
5600static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK )
5601{
5602 return GWKRun(
5603 poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
5604 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
5605 GRA_Bilinear>);
5606}
5607
5608static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK )
5609{
5610 return GWKRun(
5611 poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
5612 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
5613 GRA_Bilinear>);
5614}
5615
5616static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK )
5617{
5618 return GWKRun(
5619 poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
5620 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
5621 GRA_Bilinear>);
5622}
5623
5624#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5625
5626static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble( GDALWarpKernel *poWK )
5627{
5628 return GWKRun(
5629 poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
5630 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
5631 GRA_Bilinear>);
5632}
5633#endif
5634
5635static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK )
5636{
5637 return GWKRun(
5638 poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
5639 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
5640}
5641
5642static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort( GDALWarpKernel *poWK )
5643{
5644 return GWKRun(
5645 poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
5646 GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
5647}
5648
5649static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort( GDALWarpKernel *poWK )
5650{
5651 return GWKRun(
5652 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
5653 GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
5654}
5655
5656static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(
5657 GDALWarpKernel *poWK )
5658{
5659 return GWKRun(
5660 poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
5661 GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
5662}
5663
5664static CPLErr GWKNearestShort( GDALWarpKernel *poWK )
5665{
5666 return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
5667}
5668
5669static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat( GDALWarpKernel *poWK )
5670{
5671 return GWKRun(
5672 poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
5673 GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
5674}
5675
5676static CPLErr GWKNearestFloat( GDALWarpKernel *poWK )
5677{
5678 return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
5679}
5680
5681/************************************************************************/
5682/* GWKAverageOrMode() */
5683/* */
5684/************************************************************************/
5685
5686static void GWKAverageOrModeThread(void* pData);
5687
5688static CPLErr GWKAverageOrMode( GDALWarpKernel *poWK )
5689{
5690 return GWKRun( poWK, "GWKAverageOrMode", GWKAverageOrModeThread );
5691}
5692
5693// Overall logic based on GWKGeneralCaseThread().
5694static void GWKAverageOrModeThread( void* pData)
5695{
5696 GWKJobStruct* psJob = static_cast<GWKJobStruct *>(pData);
5697 GDALWarpKernel *poWK = psJob->poWK;
5698 const int iYMin = psJob->iYMin;
5699 const int iYMax = psJob->iYMax;
5700
5701 const int nDstXSize = poWK->nDstXSize;
5702 const int nSrcXSize = poWK->nSrcXSize;
5703 const int nSrcYSize = poWK->nSrcYSize;
5704
5705/* -------------------------------------------------------------------- */
5706/* Find out which algorithm to use (small optim.) */
5707/* -------------------------------------------------------------------- */
5708 int nAlgo = 0;
5709
5710 // These vars only used with nAlgo == 3.
5711 int *panVals = nullptr;
5712 int nBins = 0;
5713 int nBinsOffset = 0;
5714
5715 // Only used with nAlgo = 2.
5716 float* pafVals = nullptr;
5717 int* panSums = nullptr;
5718
5719 // Only used with nAlgo = 6.
5720 float quant = 0.5;
5721
5722 if( poWK->eResample == GRA_Average )
5723 {
5724 nAlgo = GWKAOM_Average;
5725 }
5726 else if( poWK->eResample == GRA_Mode )
5727 {
5728 // TODO check color table count > 256.
5729 if( poWK->eWorkingDataType == GDT_Byte ||
5730 poWK->eWorkingDataType == GDT_UInt16 ||
5731 poWK->eWorkingDataType == GDT_Int16 )
5732 {
5733 nAlgo = GWKAOM_Imode;
5734
5735 // In the case of a paletted or non-paletted byte band,
5736 // Input values are between 0 and 255.
5737 if( poWK->eWorkingDataType == GDT_Byte )
5738 {
5739 nBins = 256;
5740 }
5741 // In the case of Int16, input values are between -32768 and 32767.
5742 else if( poWK->eWorkingDataType == GDT_Int16 )
5743 {
5744 nBins = 65536;
5745 nBinsOffset = 32768;
5746 }
5747 // In the case of UInt16, input values are between 0 and 65537.
5748 else if( poWK->eWorkingDataType == GDT_UInt16 )
5749 {
5750 nBins = 65536;
5751 }
5752 panVals =
5753 static_cast<int *>(VSI_MALLOC_VERBOSE(nBins * sizeof(int)));
5754 if( panVals == nullptr )
5755 return;
5756 }
5757 else
5758 {
5759 nAlgo = GWKAOM_Fmode;
5760
5761 if( nSrcXSize > 0 && nSrcYSize > 0 )
5762 {
5763 pafVals = static_cast<float *>(
5764 VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
5765 panSums = static_cast<int *>(
5766 VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(int)));
5767 if( pafVals == nullptr || panSums == nullptr )
5768 {
5769 VSIFree(pafVals);
5770 VSIFree(panSums);
5771 return;
5772 }
5773 }
5774 }
5775 }
5776 else if( poWK->eResample == GRA_Max )
5777 {
5778 nAlgo = GWKAOM_Max;
5779 }
5780 else if( poWK->eResample == GRA_Min )
5781 {
5782 nAlgo = GWKAOM_Min;
5783 }
5784 else if( poWK->eResample == GRA_Med )
5785 {
5786 nAlgo = GWKAOM_Quant;
5787 quant = 0.5;
5788 }
5789 else if( poWK->eResample == GRA_Q1 )
5790 {
5791 nAlgo = GWKAOM_Quant;
5792 quant = 0.25;
5793 }
5794 else if( poWK->eResample == GRA_Q3 )
5795 {
5796 nAlgo = GWKAOM_Quant;
5797 quant = 0.75;
5798 }
5799 else
5800 {
5801 // Other resample algorithms not permitted here.
5802 CPLDebug("GDAL",
5803 "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
5804 "illegal resample" );
5805 return;
5806 }
5807
5808 CPLDebug("GDAL",
5809 "GDALWarpKernel():GWKAverageOrModeThread() using algo %d", nAlgo);
5810
5811/* -------------------------------------------------------------------- */
5812/* Allocate x,y,z coordinate arrays for transformation ... two */
5813/* scanlines worth of positions. */
5814/* -------------------------------------------------------------------- */
5815
5816 double *padfX =
5817 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5818 double *padfY =
5819 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5820 double *padfZ =
5821 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5822 double *padfX2 =
5823 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5824 double *padfY2 =
5825 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5826 double *padfZ2 =
5827 static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5828 int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5829 int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5830
5831 const double dfSrcCoordPrecision = CPLAtof(
5832 CSLFetchNameValueDef(poWK->papszWarpOptions,
5833 "SRC_COORD_PRECISION", "0"));
5834 const double dfErrorThreshold = CPLAtof(
5835 CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5836
5837/* ==================================================================== */
5838/* Loop over output lines. */
5839/* ==================================================================== */
5840 for( int iDstY = iYMin; iDstY < iYMax; iDstY++ )
5841 {
5842
5843/* -------------------------------------------------------------------- */
5844/* Setup points to transform to source image space. */
5845/* -------------------------------------------------------------------- */
5846 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5847 {
5848 padfX[iDstX] = iDstX + poWK->nDstXOff;
5849 padfY[iDstX] = iDstY + poWK->nDstYOff;
5850 padfZ[iDstX] = 0.0;
5851 padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
5852 padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
5853 padfZ2[iDstX] = 0.0;
5854 }
5855
5856/* -------------------------------------------------------------------- */
5857/* Transform the points from destination pixel/line coordinates */
5858/* to source pixel/line coordinates. */
5859/* -------------------------------------------------------------------- */
5860 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
5861 padfX, padfY, padfZ, pabSuccess );
5862 poWK->pfnTransformer( psJob->pTransformerArg, TRUE, nDstXSize,
5863 padfX2, padfY2, padfZ2, pabSuccess2 );
5864
5865 if( dfSrcCoordPrecision > 0.0 )
5866 {
5867 GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ,
5868 pabSuccess,
5869 dfSrcCoordPrecision,
5870 dfErrorThreshold,
5871 poWK->pfnTransformer,
5872 psJob->pTransformerArg,
5873 poWK->nDstXOff,
5874 iDstY + poWK->nDstYOff);
5875 GWKRoundSourceCoordinates(nDstXSize, padfX2, padfY2, padfZ2,
5876 pabSuccess2,
5877 dfSrcCoordPrecision,
5878 dfErrorThreshold,
5879 poWK->pfnTransformer,
5880 psJob->pTransformerArg,
5881 1.0 + poWK->nDstXOff,
5882 iDstY + 1.0 + poWK->nDstYOff);
5883 }
5884
5885/* ==================================================================== */
5886/* Loop over pixels in output scanline. */
5887/* ==================================================================== */
5888 for( int iDstX = 0; iDstX < nDstXSize; iDstX++ )
5889 {
5890 int iSrcOffset = 0;
5891 double dfDensity = 1.0;
5892 bool bHasFoundDensity = false;
5893
5894 if( !pabSuccess[iDstX] || !pabSuccess2[iDstX] )
5895 continue;
5896 const int iDstOffset = iDstX + iDstY * nDstXSize;
5897
5898/* ==================================================================== */
5899/* Loop processing each band. */
5900/* ==================================================================== */
5901
5902 for( int iBand = 0; iBand < poWK->nBands; iBand++ )
5903 {
5904 double dfBandDensity = 0.0;
5905 double dfValueReal = 0.0;
5906 double dfValueRealTmp = 0.0;
5907 double dfValueImagTmp = 0.0;
5908
5909/* -------------------------------------------------------------------- */
5910/* Collect the source value. */
5911/* -------------------------------------------------------------------- */
5912
5913 double dfTotal = 0.0;
5914 // Count of pixels used to compute average/mode.
5915 int nCount = 0;
5916 // Count of all pixels sampled, including nodata.
5917 int nCount2 = 0;
5918
5919 // Compute corners in source crs.
5920 int iSrcXMin =
5921 std::max(static_cast<int>(floor(padfX[iDstX] + 1e-10)) -
5922 poWK->nSrcXOff, 0);
5923 int iSrcXMax =
5924 std::min(static_cast<int>(ceil(padfX2[iDstX] - 1e-10)) -
5925 poWK->nSrcXOff, nSrcXSize);
5926 int iSrcYMin =
5927 std::max(static_cast<int>(floor(padfY[iDstX] + 1e-10)) -
5928 poWK->nSrcYOff, 0);
5929 int iSrcYMax =
5930 std::min(static_cast<int>(ceil(padfY2[iDstX] - 1e-10)) -
5931 poWK->nSrcYOff, nSrcYSize);
5932
5933 // The transformation might not have preserved ordering of
5934 // coordinates so do the necessary swapping (#5433).
5935 // NOTE: this is really an approximative fix. To do something
5936 // more precise we would for example need to compute the
5937 // transformation of coordinates in the
5938 // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
5939 // coordinates, and take the bounding box of the got source
5940 // coordinates.
5941 if( iSrcXMax < iSrcXMin )
5942 {
5943 iSrcXMin = std::max(
5944 static_cast<int>(floor((padfX2[iDstX] + 1.0e-10))) -
5945 poWK->nSrcXOff,
5946 0);
5947 iSrcXMax = std::min(
5948 static_cast<int>(ceil((padfX[iDstX] - 1.0e-10))) -
5949 poWK->nSrcXOff,
5950 nSrcXSize);
5951 }
5952 if( iSrcYMax < iSrcYMin )
5953 {
5954 iSrcYMin = std::max(
5955 static_cast<int>(floor((padfY2[iDstX] + 1e-10))) -
5956 poWK->nSrcYOff,
5957 0);
5958 iSrcYMax = std::min(
5959 static_cast<int>(ceil((padfY[iDstX] - 1e-10))) -
5960 poWK->nSrcYOff,
5961 nSrcYSize);
5962 }
5963 if( iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize )
5964 iSrcXMax++;
5965 if( iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize )
5966 iSrcYMax++;
5967
5968 // Loop over source lines and pixels - 3 possible algorithms.
5969
5970 // poWK->eResample == GRA_Average.
5971 if( nAlgo == GWKAOM_Average )
5972 {
5973 // This code adapted from GDALDownsampleChunk32R_AverageT()
5974 // in gcore/overview.cpp.
5975 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
5976 {
5977 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ )
5978 {
5979 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
5980
5981 if( poWK->panUnifiedSrcValid != nullptr
5982 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
5983 & (0x01 << (iSrcOffset & 0x1f))) )
5984 {
5985 continue;
5986 }
5987
5988 nCount2++;
5989 if( GWKGetPixelValue(
5990 poWK, iBand, iSrcOffset,
5991 &dfBandDensity, &dfValueRealTmp,
5992 &dfValueImagTmp ) &&
5993 dfBandDensity > BAND_DENSITY_THRESHOLD )
5994 {
5995 nCount++;
5996 dfTotal += dfValueRealTmp;
5997 }
5998 }
5999 }
6000
6001 if( nCount > 0 )
6002 {
6003 dfValueReal = dfTotal / nCount;
6004 dfBandDensity = 1;
6005 bHasFoundDensity = true;
6006 }
6007 } // GRA_Average.
6008 else if( nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode )
6009 // poWK->eResample == GRA_Mode
6010 {
6011 // This code adapted from GDALDownsampleChunk32R_Mode() in
6012 // gcore/overview.cpp.
6013 if( nAlgo == GWKAOM_Fmode ) // int32 or float.
6014 {
6015 // Does it make sense it makes to run a
6016 // majority filter on floating point data? But, here it
6017 // is for the sake of compatibility. It won't look
6018 // right on RGB images by the nature of the filter.
6019 int iMaxInd = 0;
6020 int iMaxVal = -1;
6021 int i = 0;
6022
6023 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
6024 {
6025 for( int iSrcX = iSrcXMin;
6026 iSrcX < iSrcXMax;
6027 iSrcX++ )
6028 {
6029 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
6030
6031 if( poWK->panUnifiedSrcValid != nullptr
6032 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
6033 & (0x01 << (iSrcOffset & 0x1f))) )
6034 continue;
6035
6036 nCount2++;
6037 if( GWKGetPixelValue(
6038 poWK, iBand, iSrcOffset,
6039 &dfBandDensity, &dfValueRealTmp,
6040 &dfValueImagTmp ) &&
6041 dfBandDensity > BAND_DENSITY_THRESHOLD )
6042 {
6043 nCount++;
6044
6045 const float fVal =
6046 static_cast<float>(dfValueRealTmp);
6047
6048 // Check array for existing entry.
6049 for( i = 0; i < iMaxInd; ++i )
6050 if( pafVals[i] == fVal
6051 && ++panSums[i] > panSums[iMaxVal] )
6052 {
6053 iMaxVal = i;
6054 break;
6055 }
6056
6057 // Add to arr if entry not already there.
6058 if( i == iMaxInd )
6059 {
6060 pafVals[iMaxInd] = fVal;
6061 panSums[iMaxInd] = 1;
6062
6063 if( iMaxVal < 0 )
6064 iMaxVal = iMaxInd;
6065
6066 ++iMaxInd;
6067 }
6068 }
6069 }
6070 }
6071
6072 if( iMaxVal != -1 )
6073 {
6074 dfValueReal = pafVals[iMaxVal];
6075 dfBandDensity = 1;
6076 bHasFoundDensity = true;
6077 }
6078 }
6079 else // byte or int16.
6080 {
6081 int nMaxVal = 0;
6082 int iMaxInd = -1;
6083
6084 memset(panVals, 0, nBins*sizeof(int));
6085
6086 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
6087 {
6088 for( int iSrcX = iSrcXMin;
6089 iSrcX < iSrcXMax;
6090 iSrcX++ )
6091 {
6092 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
6093
6094 if( poWK->panUnifiedSrcValid != nullptr
6095 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
6096 & (0x01 << (iSrcOffset & 0x1f))) )
6097 continue;
6098
6099 nCount2++;
6100 if( GWKGetPixelValue(
6101 poWK, iBand, iSrcOffset,
6102 &dfBandDensity, &dfValueRealTmp,
6103 &dfValueImagTmp ) &&
6104 dfBandDensity > BAND_DENSITY_THRESHOLD )
6105 {
6106 nCount++;
6107
6108 const int nVal =
6109 static_cast<int>(dfValueRealTmp);
6110 if( ++panVals[nVal+nBinsOffset] > nMaxVal )
6111 {
6112 // Sum the density.
6113 // Is it the most common value so far?
6114 iMaxInd = nVal;
6115 nMaxVal = panVals[nVal+nBinsOffset];
6116 }
6117 }
6118 }
6119 }
6120
6121 if( iMaxInd != -1 )
6122 {
6123 dfValueReal = (float)iMaxInd;
6124 dfBandDensity = 1;
6125 bHasFoundDensity = true;
6126 }
6127 }
6128 } // GRA_Mode.
6129 else if( nAlgo == GWKAOM_Max )
6130 // poWK->eResample == GRA_Max.
6131 {
6132 dfTotal = std::numeric_limits<double>::lowest();
6133 // This code adapted from nAlgo 1 method, GRA_Average.
6134 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
6135 {
6136 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ )
6137 {
6138 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
6139
6140 if( poWK->panUnifiedSrcValid != nullptr
6141 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
6142 & (0x01 << (iSrcOffset & 0x1f))) )
6143 {
6144 continue;
6145 }
6146
6147 // Returns pixel value if it is not no data.
6148 if( GWKGetPixelValue(
6149 poWK, iBand, iSrcOffset,
6150 &dfBandDensity, &dfValueRealTmp,
6151 &dfValueImagTmp ) &&
6152 dfBandDensity > BAND_DENSITY_THRESHOLD )
6153 {
6154 nCount++;
6155 if( dfTotal < dfValueRealTmp )
6156 {
6157 dfTotal = dfValueRealTmp;
6158 }
6159 }
6160 }
6161 }
6162
6163 if( nCount > 0 )
6164 {
6165 dfValueReal = dfTotal;
6166 dfBandDensity = 1;
6167 bHasFoundDensity = true;
6168 }
6169 } // GRA_Max.
6170 else if( nAlgo == GWKAOM_Min )
6171 // poWK->eResample == GRA_Min.
6172 {
6173 dfTotal = std::numeric_limits<double>::max();
6174 // This code adapted from nAlgo 1 method, GRA_Average.
6175 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
6176 {
6177 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ )
6178 {
6179 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
6180
6181 if( poWK->panUnifiedSrcValid != nullptr
6182 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
6183 & (0x01 << (iSrcOffset & 0x1f))) )
6184 {
6185 continue;
6186 }
6187
6188 // Returns pixel value if it is not no data.
6189 if( GWKGetPixelValue(
6190 poWK, iBand, iSrcOffset,
6191 &dfBandDensity, &dfValueRealTmp,
6192 &dfValueImagTmp ) &&
6193 dfBandDensity > BAND_DENSITY_THRESHOLD )
6194 {
6195 nCount++;
6196 if( dfTotal > dfValueRealTmp )
6197 {
6198 dfTotal = dfValueRealTmp;
6199 }
6200 }
6201 }
6202 }
6203
6204 if( nCount > 0 )
6205 {
6206 dfValueReal = dfTotal;
6207 dfBandDensity = 1;
6208 bHasFoundDensity = true;
6209 }
6210 } // GRA_Min.
6211 else if( nAlgo == GWKAOM_Quant )
6212 // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
6213 {
6214 std::vector<double> dfValuesTmp;
6215
6216 // This code adapted from nAlgo 1 method, GRA_Average.
6217 for( int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++ )
6218 {
6219 for( int iSrcX = iSrcXMin; iSrcX < iSrcXMax; iSrcX++ )
6220 {
6221 iSrcOffset = iSrcX + iSrcY * nSrcXSize;
6222
6223 if( poWK->panUnifiedSrcValid != nullptr
6224 && !(poWK->panUnifiedSrcValid[iSrcOffset>>5]
6225 & (0x01 << (iSrcOffset & 0x1f))) )
6226 {
6227 continue;
6228 }
6229
6230 // Returns pixel value if it is not no data.
6231 if( GWKGetPixelValue(
6232 poWK, iBand, iSrcOffset,
6233 &dfBandDensity, &dfValueRealTmp,
6234 &dfValueImagTmp ) &&
6235 dfBandDensity > BAND_DENSITY_THRESHOLD )
6236 {
6237 nCount++;
6238 dfValuesTmp.push_back(dfValueRealTmp);
6239 }
6240 }
6241 }
6242
6243 if( nCount > 0 )
6244 {
6245 std::sort(dfValuesTmp.begin(), dfValuesTmp.end());
6246 int quantIdx = static_cast<int>(
6247 std::ceil(quant * dfValuesTmp.size() - 1));
6248 dfValueReal = dfValuesTmp[quantIdx];
6249
6250 dfBandDensity = 1;
6251 bHasFoundDensity = true;
6252 dfValuesTmp.clear();
6253 }
6254 } // Quantile.
6255
6256/* -------------------------------------------------------------------- */
6257/* We have a computed value from the source. Now apply it to */
6258/* the destination pixel. */
6259/* -------------------------------------------------------------------- */
6260 if( bHasFoundDensity )
6261 {
6262 // TODO: Should we compute dfBandDensity in fct of
6263 // nCount/nCount2, or use as a threshold to set the dest
6264 // value?
6265 // dfBandDensity = (float) nCount / nCount2;
6266 // if( (float) nCount / nCount2 > 0.1 )
6267 // or fix gdalwarp crop_to_cutline to crop partially
6268 // overlapping pixels.
6269 double dfValueImag = 0.0;
6270 GWKSetPixelValue( poWK, iBand, iDstOffset,
6271 dfBandDensity,
6272 dfValueReal, dfValueImag );
6273 }
6274 }
6275
6276 if( !bHasFoundDensity )
6277 continue;
6278
6279/* -------------------------------------------------------------------- */
6280/* Update destination density/validity masks. */
6281/* -------------------------------------------------------------------- */
6282 GWKOverlayDensity( poWK, iDstOffset, dfDensity );
6283
6284 if( poWK->panDstValid != nullptr )
6285 {
6286 poWK->panDstValid[iDstOffset>>5] |=
6287 0x01 << (iDstOffset & 0x1f);
6288 }
6289 } /* Next iDstX */
6290
6291/* -------------------------------------------------------------------- */
6292/* Report progress to the user, and optionally cancel out. */
6293/* -------------------------------------------------------------------- */
6294 if( psJob->pfnProgress && psJob->pfnProgress(psJob) )
6295 break;
6296 }
6297
6298/* -------------------------------------------------------------------- */
6299/* Cleanup and return. */
6300/* -------------------------------------------------------------------- */
6301 CPLFree( padfX );
6302 CPLFree( padfY );
6303 CPLFree( padfZ );
6304 CPLFree( padfX2 );
6305 CPLFree( padfY2 );
6306 CPLFree( padfZ2 );
6307 CPLFree( pabSuccess );
6308 CPLFree( pabSuccess2 );
6309 VSIFree( panVals );
6310 VSIFree(pafVals);
6311 VSIFree(panSums);
6312}
Note: See TracBrowser for help on using the repository browser.