The function OGRGMLDataSource::Open in ogrgmldatasource.cpp fails is the GML file
has a UTF-8 encoded UNICODE BOM (Byte order mark) at the start of the file. This is
valid UTF-8 encoding (see RFC 3629 section 6) and should be allowed. Xerces properly handles this sequence. The code below is a modification to this function to allow for this.
It may be better to remove the "Test Open" functionality altogether and just let Xerces worry about correctly formed xml.
int OGRGMLDataSource::Open( const char * pszNewName, int bTestOpen )
{
FILE *fp;
char szHeader[1000];
/* -------------------------------------------------------------------- */
/* Open the source file. */
/* -------------------------------------------------------------------- */
fp = VSIFOpen( pszNewName, "r" );
if( fp == NULL )
{
if( !bTestOpen )
CPLError( CE_Failure, CPLE_OpenFailed,
"Failed to open GML file `%s'.",
pszNewName );
return FALSE;
}
/* -------------------------------------------------------------------- */
/* If we aren't sure it is GML, load a header chunk and check */
/* for signs it is GML */
/* -------------------------------------------------------------------- */
if( bTestOpen )
{
char *szPtr = szHeader;
VSIFRead( szHeader, 1, sizeof(szHeader), fp );
szHeader[sizeof(szHeader)-1] = '\0';
/* -------------------------------------------------------------------- */
/* Check for a UTF-8 BOM and skip if found */
/* -------------------------------------------------------------------- */
if (((unsigned char)szPtr[0] == 0xEF) && ((unsigned char)szPtr[1] == 0xBB) && ((unsigned char)szPtr[2] == 0xBF))
szPtr += 3;
if( szPtr[0] != '<'
|| strstr(szPtr,"opengis.net/gml") == NULL )
{
VSIFClose( fp );
return FALSE;
}
}