| 1 | #!/usr/bin/env python
|
|---|
| 2 |
|
|---|
| 3 | """
|
|---|
| 4 | MODULE: v.in.pygbif
|
|---|
| 5 |
|
|---|
| 6 | AUTHOR(S): Stefan Blumentrath < stefan.blumentrath AT nina.no>
|
|---|
| 7 | Helmut Kudrnovsky <alectoria AT gmx at>
|
|---|
| 8 |
|
|---|
| 9 | PURPOSE: Search and import GBIF species distribution data directly from
|
|---|
| 10 | GBIF API using pygbif
|
|---|
| 11 |
|
|---|
| 12 | COPYRIGHT: (C) 2016 by the GRASS Development Team
|
|---|
| 13 |
|
|---|
| 14 | This program is free software under the GNU General Public
|
|---|
| 15 | License (>=v2). Read the file COPYING that comes with GRASS
|
|---|
| 16 | for details.
|
|---|
| 17 | """
|
|---|
| 18 |
|
|---|
| 19 | """
|
|---|
| 20 | To Dos:
|
|---|
| 21 | - use proper cleanup routine, esp if using csv + vrt (copy from other modules)
|
|---|
| 22 | - handle layers in mask input
|
|---|
| 23 | - add progress bar
|
|---|
| 24 | - make date_from and date_to dependent on each other or use today as date_to if not specified
|
|---|
| 25 |
|
|---|
| 26 | """
|
|---|
| 27 |
|
|---|
| 28 | #%module
|
|---|
| 29 | #% description: Search and import GBIF species distribution data
|
|---|
| 30 | #% keyword: vector
|
|---|
| 31 | #% keyword: geometry
|
|---|
| 32 | #%end
|
|---|
| 33 |
|
|---|
| 34 | #%option G_OPT_V_OUTPUT
|
|---|
| 35 | #% key: output
|
|---|
| 36 | #% description: Name of resulting vector map with occurrences
|
|---|
| 37 | #% required : yes
|
|---|
| 38 | #%end
|
|---|
| 39 |
|
|---|
| 40 | #%option
|
|---|
| 41 | #% key: taxa
|
|---|
| 42 | #% description: Comma separated list of taxon names or keys to fetch data for
|
|---|
| 43 | #% required : yes
|
|---|
| 44 | #%end
|
|---|
| 45 |
|
|---|
| 46 | #%option G_OPT_V_INPUT
|
|---|
| 47 | #% key: mask
|
|---|
| 48 | #% description: Vector map that delimits region of interest
|
|---|
| 49 | #% guisection: Spatial filter
|
|---|
| 50 | #% required: no
|
|---|
| 51 | #%end
|
|---|
| 52 |
|
|---|
| 53 | #%option
|
|---|
| 54 | #% key: date_from
|
|---|
| 55 | #% type: string
|
|---|
| 56 | #% description: Lower bound of acceptable dates (format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd)
|
|---|
| 57 | #% guisection: Temporal filter
|
|---|
| 58 | #% required: no
|
|---|
| 59 | #%end
|
|---|
| 60 |
|
|---|
| 61 | #%option
|
|---|
| 62 | #% key: date_to
|
|---|
| 63 | #% type: string
|
|---|
| 64 | #% description: Upper bound of acceptable dates (format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd)
|
|---|
| 65 | #% guisection: Temporal filter
|
|---|
| 66 | #% required: no
|
|---|
| 67 | #%end
|
|---|
| 68 |
|
|---|
| 69 | # Import will allways be limited to current region except for latlon locations
|
|---|
| 70 | #%flag
|
|---|
| 71 | #% key: b
|
|---|
| 72 | #% description: Do not build topology
|
|---|
| 73 | #%end
|
|---|
| 74 |
|
|---|
| 75 | #%flag
|
|---|
| 76 | #% key: r
|
|---|
| 77 | #% description: Do not limit import to current region (works only in lat/lon)
|
|---|
| 78 | #% guisection: Spatial filter
|
|---|
| 79 | #%end
|
|---|
| 80 |
|
|---|
| 81 | #%flag
|
|---|
| 82 | #% key: p
|
|---|
| 83 | #% description: Print result from matching taxa names and exit
|
|---|
| 84 | #% guisection: Print
|
|---|
| 85 | #% suppress_required: yes
|
|---|
| 86 | #%end
|
|---|
| 87 |
|
|---|
| 88 | #%flag
|
|---|
| 89 | #% key: i
|
|---|
| 90 | #% description: Produce individual map for each taxon
|
|---|
| 91 | #%end
|
|---|
| 92 |
|
|---|
| 93 | #%flag
|
|---|
| 94 | #% key: g
|
|---|
| 95 | #% description: Print result from matching taxon names in shell script style and exit
|
|---|
| 96 | #% guisection: Print
|
|---|
| 97 | #% suppress_required: yes
|
|---|
| 98 | #%end
|
|---|
| 99 |
|
|---|
| 100 | #%flag
|
|---|
| 101 | #% key: o
|
|---|
| 102 | #% description: Print number of matching occurrences per taxon and exit
|
|---|
| 103 | #% guisection: Print
|
|---|
| 104 | #% suppress_required: yes
|
|---|
| 105 | #%end
|
|---|
| 106 |
|
|---|
| 107 | #%flag
|
|---|
| 108 | #% key: t
|
|---|
| 109 | #% description: Print result of taxon matching in table format and exit
|
|---|
| 110 | #% guisection: Print
|
|---|
| 111 | #% suppress_required: yes
|
|---|
| 112 | #%end
|
|---|
| 113 |
|
|---|
| 114 | #%option
|
|---|
| 115 | #% key: basisofrecord
|
|---|
| 116 | #% type: string
|
|---|
| 117 | #% description: Accepted basis of records
|
|---|
| 118 | #% guisection: Context filter
|
|---|
| 119 | #% required: no
|
|---|
| 120 | #% multiple: no
|
|---|
| 121 | #% options: ALL,FOSSIL_SPECIMEN,HUMAN_OBSERVATION,LITERATURE,LIVING_SPECIMEN,MACHINE_OBSERVATION,OBSERVATION,PRESERVED_SPECIMEN,UNKNOWN
|
|---|
| 122 | #% answer: ALL
|
|---|
| 123 | #%end
|
|---|
| 124 |
|
|---|
| 125 | #%option
|
|---|
| 126 | #% key: rank
|
|---|
| 127 | #% type: string
|
|---|
| 128 | #% description: Rank of the taxon to search for
|
|---|
| 129 | #% guisection: Context filter
|
|---|
| 130 | #% required: yes
|
|---|
| 131 | #% multiple: no
|
|---|
| 132 | #% options: class,cultivar,cultivar_group,domain,family,form,genus,informal,infrageneric_name,infraorder,infraspecific_name,infrasubspecific_name,kingdom,order,phylum,section,series,species,strain,subclass,subfamily,subform,subgenus,subkingdom,suborder,subphylum,subsection,subseries,subspecies,subtribe,subvariety,superclass,superfamily,superorder,superphylum,suprageneric_name,tribe,unranked,variety
|
|---|
| 133 | #% answer: species
|
|---|
| 134 | #%end
|
|---|
| 135 |
|
|---|
| 136 | #%option
|
|---|
| 137 | #% key: recordedby
|
|---|
| 138 | #% type: string
|
|---|
| 139 | #% description: The person who recorded the occurrence.
|
|---|
| 140 | #% guisection: Context filter
|
|---|
| 141 | #%end
|
|---|
| 142 |
|
|---|
| 143 | #%option
|
|---|
| 144 | #% key: institutioncode
|
|---|
| 145 | #% type: string
|
|---|
| 146 | #% description: An identifier of any form assigned by the source to identify the institution the record belongs to.
|
|---|
| 147 | #% guisection: Context filter
|
|---|
| 148 | #%end
|
|---|
| 149 |
|
|---|
| 150 | #%option
|
|---|
| 151 | #% key: country
|
|---|
| 152 | #% type: string
|
|---|
| 153 | #% description: The 2-letter country code (as per ISO-3166-1) of the country in which the occurrence was recorded
|
|---|
| 154 | #% guisection: Spatial filter
|
|---|
| 155 | #%end
|
|---|
| 156 |
|
|---|
| 157 | #%option
|
|---|
| 158 | #% key: continent
|
|---|
| 159 | #% type: string
|
|---|
| 160 | #% description: The continent in which the occurrence was recorded
|
|---|
| 161 | #% guisection: Spatial filter
|
|---|
| 162 | #% options: africa,antarctica,asia,europe,north_america,oceania,south_america
|
|---|
| 163 | #%end
|
|---|
| 164 |
|
|---|
| 165 | #%flag
|
|---|
| 166 | #% key: n
|
|---|
| 167 | #% description: Do not limit search to records with coordinates
|
|---|
| 168 | #% guisection: Spatial filter
|
|---|
| 169 | #%end
|
|---|
| 170 |
|
|---|
| 171 | #%flag
|
|---|
| 172 | #% key: s
|
|---|
| 173 | #% description: Do also import occurrences with spatial issues
|
|---|
| 174 | #% guisection: Spatial filter
|
|---|
| 175 | #%end
|
|---|
| 176 |
|
|---|
| 177 | import sys
|
|---|
| 178 | import os
|
|---|
| 179 | import math
|
|---|
| 180 | from osgeo import ogr
|
|---|
| 181 | from osgeo import osr
|
|---|
| 182 | import grass.script as grass
|
|---|
| 183 | from grass.pygrass.vector import Vector
|
|---|
| 184 | from grass.pygrass.vector import VectorTopo
|
|---|
| 185 | from grass.pygrass.vector.geometry import Point
|
|---|
| 186 | from dateutil.parser import parse
|
|---|
| 187 |
|
|---|
| 188 | def set_output_encoding(encoding='utf-8'):
|
|---|
| 189 | import sys
|
|---|
| 190 | import codecs
|
|---|
| 191 | '''When piping to the terminal, python knows the encoding needed, and
|
|---|
| 192 | sets it automatically. But when piping to another program (for example,
|
|---|
| 193 | | less), python can not check the output encoding. In that case, it
|
|---|
| 194 | is None. What I am doing here is to catch this situation for both
|
|---|
| 195 | stdout and stderr and force the encoding'''
|
|---|
| 196 | current = sys.stdout.encoding
|
|---|
| 197 | if current is None:
|
|---|
| 198 | sys.stdout = codecs.getwriter(encoding)(sys.stdout)
|
|---|
| 199 | current = sys.stderr.encoding
|
|---|
| 200 | if current is None:
|
|---|
| 201 | sys.stderr = codecs.getwriter(encoding)(sys.stderr)
|
|---|
| 202 |
|
|---|
| 203 |
|
|---|
| 204 | def main():
|
|---|
| 205 |
|
|---|
| 206 | try:
|
|---|
| 207 | from pygbif import occurrences
|
|---|
| 208 | from pygbif import species
|
|---|
| 209 | except ImportError:
|
|---|
| 210 | grass.fatal(_("Cannot import pygbif (https://github.com/sckott/pygbif)"
|
|---|
| 211 | " library."
|
|---|
| 212 | " Please install it (pip install pygbif)"
|
|---|
| 213 | " or ensure that it is on path"
|
|---|
| 214 | " (use PYTHONPATH variable)."))
|
|---|
| 215 |
|
|---|
| 216 | # Parse input options
|
|---|
| 217 | output = options['output']
|
|---|
| 218 | mask = options['mask']
|
|---|
| 219 | species_maps = flags['i']
|
|---|
| 220 | no_region_limit = flags['r']
|
|---|
| 221 | no_topo = flags['b']
|
|---|
| 222 | print_species = flags['p']
|
|---|
| 223 | print_species_table = flags['t']
|
|---|
| 224 | print_species_shell = flags['g']
|
|---|
| 225 | print_occ_number = flags['o']
|
|---|
| 226 | allow_no_geom = flags['n']
|
|---|
| 227 | hasGeoIssue = flags['s']
|
|---|
| 228 | taxa_list = options['taxa'].split(',')
|
|---|
| 229 | institutionCode = options['institutioncode']
|
|---|
| 230 | basisofrecord = options['basisofrecord']
|
|---|
| 231 | recordedby = options['recordedby'].split(',')
|
|---|
| 232 | date_from = options['date_from']
|
|---|
| 233 | date_to = options['date_to']
|
|---|
| 234 | country = options['country']
|
|---|
| 235 | continent = options['continent']
|
|---|
| 236 | rank = options['rank']
|
|---|
| 237 |
|
|---|
| 238 | # Define static variable
|
|---|
| 239 | #Initialize cat
|
|---|
| 240 | cat = 0
|
|---|
| 241 | # Number of occurrences to fetch in one request
|
|---|
| 242 | chunk_size = 300
|
|---|
| 243 | # lat/lon proj string
|
|---|
| 244 | latlon_crs = ['+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000',
|
|---|
| 245 | '+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0']
|
|---|
| 246 | # List attributes available in Darwin Core
|
|---|
| 247 | # not all attributes are returned in each request
|
|---|
| 248 | # to avoid key errors when accessing the dictionary returned by pygbif
|
|---|
| 249 | # presence of DWC keys in the returned dictionary is checked using this list
|
|---|
| 250 | # The number of keys in this list has to be equal to the number of columns
|
|---|
| 251 | # in the attribute table and the attributes written for each occurrence
|
|---|
| 252 | dwc_keys = ['key', 'taxonRank', 'taxonKey', 'taxonID', 'scientificName',
|
|---|
| 253 | 'species', 'speciesKey', 'genericName', 'genus', 'genusKey',
|
|---|
| 254 | 'family', 'familyKey', 'order', 'orderKey', 'class',
|
|---|
| 255 | 'classKey', 'phylum', 'phylumKey', 'kingdom', 'kingdomKey',
|
|---|
| 256 | 'eventDate', 'verbatimEventDate', 'startDayOfYear',
|
|---|
| 257 | 'endDayOfYear', 'year', 'month', 'day', 'occurrenceID',
|
|---|
| 258 | 'occurrenceStatus', 'occurrenceRemarks', 'Habitat',
|
|---|
| 259 | 'basisOfRecord', 'preparations', 'sex', 'type', 'locality',
|
|---|
| 260 | 'verbatimLocality', 'decimalLongitude', 'decimalLatitude',
|
|---|
| 261 | 'geodeticDatum', 'higerGeography', 'continent', 'country',
|
|---|
| 262 | 'countryCode', 'stateProvince', 'gbifID', 'protocol',
|
|---|
| 263 | 'identifier', 'recordedBy', 'identificationID', 'identifiers',
|
|---|
| 264 | 'dateIdentified', 'modified', 'institutionCode',
|
|---|
| 265 | 'lastInterpreted', 'lastParsed', 'references', 'relations',
|
|---|
| 266 | 'catalogNumber', 'occurrenceDetails', 'datasetKey',
|
|---|
| 267 | 'datasetName', 'collectionCode', 'rights', 'rightsHolder',
|
|---|
| 268 | 'license', 'publishingOrgKey', 'publishingCountry',
|
|---|
| 269 | 'lastCrawled', 'specificEpithet', 'facts', 'issues',
|
|---|
| 270 | 'extensions', 'language']
|
|---|
| 271 | # Deinfe columns for attribute table
|
|---|
| 272 | cols = [('cat', 'INTEGER PRIMARY KEY'),
|
|---|
| 273 | ('g_search', 'varchar(100)'),
|
|---|
| 274 | ('g_key', 'integer'),
|
|---|
| 275 | ('g_taxonrank', 'varchar(50)'),
|
|---|
| 276 | ('g_taxonkey', 'integer'),
|
|---|
| 277 | ('g_taxonid', 'varchar(50)'),
|
|---|
| 278 | ('g_scientificname', 'varchar(255)'),
|
|---|
| 279 | ('g_species', 'varchar(255)'),
|
|---|
| 280 | ('g_specieskey', 'integer'),
|
|---|
| 281 | ('g_genericname', 'varchar(255)'),
|
|---|
| 282 | ('g_genus', 'varchar(50)'),
|
|---|
| 283 | ('g_genuskey', 'integer'),
|
|---|
| 284 | ('g_family', 'varchar(50)'),
|
|---|
| 285 | ('g_familykey', 'integer'),
|
|---|
| 286 | ('g_order', 'varchar(50)'),
|
|---|
| 287 | ('g_orderkey', 'integer'),
|
|---|
| 288 | ('g_class', 'varchar(50)'),
|
|---|
| 289 | ('g_classkey', 'integer'),
|
|---|
| 290 | ('g_phylum', 'varchar(50)'),
|
|---|
| 291 | ('g_phylumkey', 'integer'),
|
|---|
| 292 | ('g_kingdom', 'varchar(50)'),
|
|---|
| 293 | ('g_kingdomkey', 'integer'),
|
|---|
| 294 | ('g_eventdate', 'text'),
|
|---|
| 295 | ('g_verbatimeventdate', 'varchar(50)'),
|
|---|
| 296 | ('g_startDayOfYear', 'integer'),
|
|---|
| 297 | ('g_endDayOfYear', 'integer'),
|
|---|
| 298 | ('g_year', 'integer'),
|
|---|
| 299 | ('g_month', 'integer'),
|
|---|
| 300 | ('g_day', 'integer'),
|
|---|
| 301 | ('g_occurrenceid', 'varchar(255)'),
|
|---|
| 302 | ('g_occurrenceStatus', 'varchar(50)'),
|
|---|
| 303 | ('g_occurrenceRemarks', 'varchar(50)'),
|
|---|
| 304 | ('g_Habitat', 'varchar(50)'),
|
|---|
| 305 | ('g_basisofrecord', 'varchar(50)'),
|
|---|
| 306 | ('g_preparations', 'varchar(50)'),
|
|---|
| 307 | ('g_sex', 'varchar(50)'),
|
|---|
| 308 | ('g_type', 'varchar(50)'),
|
|---|
| 309 | ('g_locality', 'varchar(255)'),
|
|---|
| 310 | ('g_verbatimlocality', 'varchar(255)'),
|
|---|
| 311 | ('g_decimallongitude', 'double precision'),
|
|---|
| 312 | ('g_decimallatitude', 'double precision'),
|
|---|
| 313 | ('g_geodeticdatum', 'varchar(50)'),
|
|---|
| 314 | ('g_higerGeography', 'varchar(255)'),
|
|---|
| 315 | ('g_continent', 'varchar(50)'),
|
|---|
| 316 | ('g_country', 'varchar(50)'),
|
|---|
| 317 | ('g_countryCode', 'varchar(50)'),
|
|---|
| 318 | ('g_stateProvince', 'varchar(50)'),
|
|---|
| 319 | ('g_gbifid', 'varchar(255)'),
|
|---|
| 320 | ('g_protocol', 'varchar(255)'),
|
|---|
| 321 | ('g_identifier', 'varchar(50)'),
|
|---|
| 322 | ('g_recordedby', 'varchar(255)'),
|
|---|
| 323 | ('g_identificationid', 'varchar(255)'),
|
|---|
| 324 | ('g_identifiers', 'text'),
|
|---|
| 325 | ('g_dateidentified', 'text'),
|
|---|
| 326 | ('g_modified', 'text'),
|
|---|
| 327 | ('g_institutioncode', 'varchar(50)'),
|
|---|
| 328 | ('g_lastinterpreted', 'text'),
|
|---|
| 329 | ('g_lastparsed', 'text'),
|
|---|
| 330 | ('g_references', 'varchar(255)'),
|
|---|
| 331 | ('g_relations', 'text'),
|
|---|
| 332 | ('g_catalognumber', 'varchar(50)'),
|
|---|
| 333 | ('g_occurrencedetails', 'text'),
|
|---|
| 334 | ('g_datasetkey', 'varchar(50)'),
|
|---|
| 335 | ('g_datasetname', 'varchar(255)'),
|
|---|
| 336 | ('g_collectioncode', 'varchar(50)'),
|
|---|
| 337 | ('g_rights', 'varchar(255)'),
|
|---|
| 338 | ('g_rightsholder', 'varchar(255)'),
|
|---|
| 339 | ('g_license', 'varchar(50)'),
|
|---|
| 340 | ('g_publishingorgkey', 'varchar(50)'),
|
|---|
| 341 | ('g_publishingcountry', 'varchar(50)'),
|
|---|
| 342 | ('g_lastcrawled', 'text'),
|
|---|
| 343 | ('g_specificepithet', 'varchar(50)'),
|
|---|
| 344 | ('g_facts', 'text'),
|
|---|
| 345 | ('g_issues', 'text'),
|
|---|
| 346 | ('g_extensions', 'text'),
|
|---|
| 347 | ('g_language', 'varchar(50)')]
|
|---|
| 348 |
|
|---|
| 349 | set_output_encoding()
|
|---|
| 350 | # Set temporal filter if requested by user
|
|---|
| 351 | # Initialize eventDate filter
|
|---|
| 352 | eventDate = None
|
|---|
| 353 | # Check if date from is compatible (ISO compliant)
|
|---|
| 354 | if date_from:
|
|---|
| 355 | try:
|
|---|
| 356 | parse(date_from)
|
|---|
| 357 | except:
|
|---|
| 358 | grass.fatal("Invalid invalid start date provided")
|
|---|
| 359 |
|
|---|
| 360 | if date_from and not date_to:
|
|---|
| 361 | eventDate = '{}'.format(date_from)
|
|---|
| 362 | # Check if date to is compatible (ISO compliant)
|
|---|
| 363 | if date_to:
|
|---|
| 364 | try:
|
|---|
| 365 | parse(date_to)
|
|---|
| 366 | except:
|
|---|
| 367 | grass.fatal("Invalid invalid end date provided")
|
|---|
| 368 | # Check if date to is after date_from
|
|---|
| 369 | if parse(date_from) < parse(date_to):
|
|---|
| 370 | eventDate = '{},{}'.format(date_from, date_to)
|
|---|
| 371 | else:
|
|---|
| 372 | grass.fatal("Invalid date range: End date has to be after start date!")
|
|---|
| 373 | # Set filter on basisOfRecord if requested by user
|
|---|
| 374 | if basisofrecord == 'ALL':
|
|---|
| 375 | basisOfRecord = None
|
|---|
| 376 | else:
|
|---|
| 377 | basisOfRecord = basisofrecord
|
|---|
| 378 | # Allow also occurrences with spatial issues if requested by user
|
|---|
| 379 | hasGeospatialIssue = False
|
|---|
| 380 | if hasGeoIssue:
|
|---|
| 381 | hasGeospatialIssue = True
|
|---|
| 382 | # Allow also occurrences without coordinates if requested by user
|
|---|
| 383 | hasCoordinate = True
|
|---|
| 384 | if allow_no_geom:
|
|---|
| 385 | hasCoordinate = False
|
|---|
| 386 |
|
|---|
| 387 | # Set reprojection parameters
|
|---|
| 388 | # Set target projection of current LOCATION
|
|---|
| 389 | target_crs = grass.read_command('g.proj', flags='fj').rstrip(os.linesep)
|
|---|
| 390 | target = osr.SpatialReference(target_crs)
|
|---|
| 391 | target.ImportFromProj4(target_crs)
|
|---|
| 392 | if target == 'XY location (unprojected)':
|
|---|
| 393 | grass.fatal("Sorry, XY locations are not supported!")
|
|---|
| 394 |
|
|---|
| 395 | # Set source projection from GBIF
|
|---|
| 396 | source = osr.SpatialReference()
|
|---|
| 397 | source.ImportFromEPSG(4326)
|
|---|
| 398 | if target_crs not in latlon_crs:
|
|---|
| 399 | transform = osr.CoordinateTransformation(source, target)
|
|---|
| 400 | reverse_transform = osr.CoordinateTransformation(target, source)
|
|---|
| 401 |
|
|---|
| 402 | # Generate WKT polygon to use for spatial filtering if requested
|
|---|
| 403 | if mask:
|
|---|
| 404 | if len(mask.split('@')) == 2:
|
|---|
| 405 | m = VectorTopo(mask.split('@')[0], mapset=mask.split('@')[1])
|
|---|
| 406 | else:
|
|---|
| 407 | m = VectorTopo(mask)
|
|---|
| 408 | if not m.exist():
|
|---|
| 409 | grass.fatal('Could not find vector map <{}>'.format(mask))
|
|---|
| 410 | m.open('r')
|
|---|
| 411 | if not m.is_open():
|
|---|
| 412 | grass.fatal('Could not open vector map <{}>'.format(mask))
|
|---|
| 413 |
|
|---|
| 414 | # Use map Bbox as spatial filter if map contains <> 1 area
|
|---|
| 415 | if m.number_of('areas') == 1:
|
|---|
| 416 | region_pol = [area.to_wkt() for area in m.viter("areas")][0]
|
|---|
| 417 | else:
|
|---|
| 418 | bbox = str(m.bbox()).replace('Bbox(', '').replace(' ', '').rstrip(')').split(',')
|
|---|
| 419 | region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(bbox[2],
|
|---|
| 420 | bbox[0], bbox[3], bbox[1])
|
|---|
| 421 | m.close()
|
|---|
| 422 | else:
|
|---|
| 423 | # Do not limit import spatially if LOCATION is able to take global data
|
|---|
| 424 | if no_region_limit:
|
|---|
| 425 | if target_crs not in latlon_crs:
|
|---|
| 426 | grass.fatal('Import of data from outside the current region is'
|
|---|
| 427 | 'only supported in a WGS84 location!')
|
|---|
| 428 | region_pol = None
|
|---|
| 429 | else:
|
|---|
| 430 | # Limit import spatially to current region
|
|---|
| 431 | # if LOCATION is !NOT! able to take global data
|
|---|
| 432 | # to avoid pprojection ERRORS
|
|---|
| 433 | region = grass.parse_command('g.region', flags='g')
|
|---|
| 434 | region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(region['e'],
|
|---|
| 435 | region['n'], region['w'], region['s'])
|
|---|
| 436 |
|
|---|
| 437 | # Do not reproject in latlon LOCATIONS
|
|---|
| 438 | if target_crs not in latlon_crs:
|
|---|
| 439 | pol = ogr.CreateGeometryFromWkt(region_pol)
|
|---|
| 440 | pol.Transform(reverse_transform)
|
|---|
| 441 | pol = pol.ExportToWkt()
|
|---|
| 442 | else:
|
|---|
| 443 | pol = region_pol
|
|---|
| 444 |
|
|---|
| 445 | # Create output map if not output maps for each species are requested
|
|---|
| 446 | if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
|
|---|
| 447 | mapname = output
|
|---|
| 448 | new = Vector(mapname)
|
|---|
| 449 | new.open('w', tab_name=mapname, tab_cols=cols)
|
|---|
| 450 | cat = 1
|
|---|
| 451 |
|
|---|
| 452 | # Import data for each species
|
|---|
| 453 | for s in taxa_list:
|
|---|
| 454 | # Get the taxon key if not the taxon key is provided as input
|
|---|
| 455 | try:
|
|---|
| 456 | key = int(s)
|
|---|
| 457 | except:
|
|---|
| 458 | try:
|
|---|
| 459 | species_match = species.name_backbone(s, rank=rank,
|
|---|
| 460 | strict=False,
|
|---|
| 461 | verbose=True)
|
|---|
| 462 | key = species_match['usageKey']
|
|---|
| 463 | except:
|
|---|
| 464 | grass.error('Data request for taxon {} failed. Are you online?'.format(s))
|
|---|
| 465 | continue
|
|---|
| 466 |
|
|---|
| 467 | # Return matching taxon and alternatives and exit
|
|---|
| 468 | if print_species:
|
|---|
| 469 | print('Matching taxon for {} is:'.format(s))
|
|---|
| 470 | print('{} {}'.format(species_match['scientificName'], species_match['status']))
|
|---|
| 471 | if 'alternatives' in list(species_match.keys()):
|
|---|
| 472 | print('Alternative matches might be:'.format(s))
|
|---|
| 473 | for m in species_match['alternatives']:
|
|---|
| 474 | print('{} {}'.format(m['scientificName'], m['status']))
|
|---|
| 475 | else:
|
|---|
| 476 | print('No alternatives found for the given taxon')
|
|---|
| 477 | continue
|
|---|
| 478 | if print_species_shell:
|
|---|
| 479 | print('match={}'.format(species_match['scientificName']))
|
|---|
| 480 | if 'alternatives' in list(species_match.keys()):
|
|---|
| 481 | alternatives = []
|
|---|
| 482 | for m in species_match['alternatives']:
|
|---|
| 483 | alternatives.append(m['scientificName'])
|
|---|
| 484 | print('alternatives={}'.format(','.join(alternatives)))
|
|---|
| 485 | continue
|
|---|
| 486 | if print_species_table:
|
|---|
| 487 | if 'alternatives' in list(species_match.keys()):
|
|---|
| 488 | if len(species_match['alternatives']) == 0:
|
|---|
| 489 | print('{0}|{1}|{2}|'.format(s, key, species_match['scientificName']))
|
|---|
| 490 | else:
|
|---|
| 491 | alternatives = []
|
|---|
| 492 | for m in species_match['alternatives']:
|
|---|
| 493 | alternatives.append(m['scientificName'])
|
|---|
| 494 | print('{0}|{1}|{2}|{3}'.format(s, key, species_match['scientificName'],
|
|---|
| 495 | ','.join(alternatives)))
|
|---|
| 496 | continue
|
|---|
| 497 | try:
|
|---|
| 498 | returns_n = occurrences.search(taxonKey=key,
|
|---|
| 499 | hasGeospatialIssue=hasGeospatialIssue,
|
|---|
| 500 | hasCoordinate=hasCoordinate,
|
|---|
| 501 | institutionCode=institutionCode,
|
|---|
| 502 | basisOfRecord=basisOfRecord,
|
|---|
| 503 | recordedBy=recordedby,
|
|---|
| 504 | eventDate=eventDate,
|
|---|
| 505 | continent=continent,
|
|---|
| 506 | country=country,
|
|---|
| 507 | geometry=pol,
|
|---|
| 508 | limit=1)['count']
|
|---|
| 509 | except:
|
|---|
| 510 | grass.error('Data request for taxon {} faild. Are you online?'.format(s))
|
|---|
| 511 | returns_n = 0
|
|---|
| 512 |
|
|---|
| 513 | # Exit if search does not give a return
|
|---|
| 514 | # Print only number of returns for the given search and exit
|
|---|
| 515 | if print_occ_number:
|
|---|
| 516 | grass.message('Found {0} occurrences for taxon {1}...'.format(returns_n, s))
|
|---|
| 517 | continue
|
|---|
| 518 | elif returns_n <= 0:
|
|---|
| 519 | grass.warning('No occurrences for current search for taxon {0}...'.format(s))
|
|---|
| 520 | continue
|
|---|
| 521 | elif returns_n >= 200000:
|
|---|
| 522 | grass.warning('Your search for {1} returns {0} records.\n'
|
|---|
| 523 | 'Unfortunately, the GBIF search API is limited to 200,000 records per request.\n'
|
|---|
| 524 | 'The download will be incomplete. Please consider to split up your search.'.format(returns_n, s))
|
|---|
| 525 |
|
|---|
| 526 | # Get the number of chunks to download
|
|---|
| 527 | chunks = int(math.ceil(returns_n / float(chunk_size)))
|
|---|
| 528 | grass.verbose('Downloading {0} occurrences for taxon {1}...'.format(returns_n, s))
|
|---|
| 529 |
|
|---|
| 530 | # Create a map for each species if requested using map name as suffix
|
|---|
| 531 | if species_maps:
|
|---|
| 532 | mapname = '{}_{}'.format(s.replace(' ', '_'), output)
|
|---|
| 533 |
|
|---|
| 534 | new = Vector(mapname)
|
|---|
| 535 | new.open('w', tab_name=mapname, tab_cols=cols)
|
|---|
| 536 | cat = 0
|
|---|
| 537 |
|
|---|
| 538 | # Download the data from GBIF
|
|---|
| 539 | for c in range(chunks):
|
|---|
| 540 | # Define offset
|
|---|
| 541 | offset = c * chunk_size
|
|---|
| 542 | # Adjust chunk_size to the hard limit of 200,000 records in GBIF API
|
|---|
| 543 | # if necessary
|
|---|
| 544 | if offset + chunk_size >= 200000:
|
|---|
| 545 | chunk_size = 200000 - offset
|
|---|
| 546 | # Get the returns for the next chunk
|
|---|
| 547 | returns = occurrences.search(taxonKey=key,
|
|---|
| 548 | hasGeospatialIssue=hasGeospatialIssue,
|
|---|
| 549 | hasCoordinate=hasCoordinate,
|
|---|
| 550 | institutionCode=institutionCode,
|
|---|
| 551 | basisOfRecord=basisOfRecord,
|
|---|
| 552 | recordedBy=recordedby,
|
|---|
| 553 | eventDate=eventDate,
|
|---|
| 554 | continent=continent,
|
|---|
| 555 | country=country,
|
|---|
| 556 | geometry=pol,
|
|---|
| 557 | limit=chunk_size,
|
|---|
| 558 | offset=offset)
|
|---|
| 559 |
|
|---|
| 560 | # Write the returned data to map and attribute table
|
|---|
| 561 | for res in returns['results']:
|
|---|
| 562 | if target_crs not in latlon_crs:
|
|---|
| 563 | point = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(res['decimalLongitude'], res['decimalLatitude']))
|
|---|
| 564 | point.Transform(transform)
|
|---|
| 565 | x = point.GetX()
|
|---|
| 566 | y = point.GetY()
|
|---|
| 567 | else:
|
|---|
| 568 | x = res['decimalLongitude']
|
|---|
| 569 | y = res['decimalLatitude']
|
|---|
| 570 |
|
|---|
| 571 | point = Point(x, y)
|
|---|
| 572 |
|
|---|
| 573 | for k in dwc_keys:
|
|---|
| 574 | if k not in list(res.keys()):
|
|---|
| 575 | res.update({k: None})
|
|---|
| 576 |
|
|---|
| 577 | cat = cat + 1
|
|---|
| 578 | new.write(point, cat=cat, attrs=(
|
|---|
| 579 | '{}'.format(s),
|
|---|
| 580 | res['key'],
|
|---|
| 581 | res['taxonRank'],
|
|---|
| 582 | res['taxonKey'],
|
|---|
| 583 | res['taxonID'],
|
|---|
| 584 | res['scientificName'],
|
|---|
| 585 | res['species'],
|
|---|
| 586 | res['speciesKey'],
|
|---|
| 587 | res['genericName'],
|
|---|
| 588 | res['genus'],
|
|---|
| 589 | res['genusKey'],
|
|---|
| 590 | res['family'],
|
|---|
| 591 | res['familyKey'],
|
|---|
| 592 | res['order'],
|
|---|
| 593 | res['orderKey'],
|
|---|
| 594 | res['class'],
|
|---|
| 595 | res['classKey'],
|
|---|
| 596 | res['phylum'],
|
|---|
| 597 | res['phylumKey'],
|
|---|
| 598 | res['kingdom'],
|
|---|
| 599 | res['kingdomKey'],
|
|---|
| 600 | '{}'.format(res['eventDate']) if res['eventDate'] else None,
|
|---|
| 601 | '{}'.format(res['verbatimEventDate']) if res['verbatimEventDate'] else None,
|
|---|
| 602 | res['startDayOfYear'],
|
|---|
| 603 | res['endDayOfYear'],
|
|---|
| 604 | res['year'],
|
|---|
| 605 | res['month'],
|
|---|
| 606 | res['day'],
|
|---|
| 607 | res['occurrenceID'],
|
|---|
| 608 | res['occurrenceStatus'],
|
|---|
| 609 | res['occurrenceRemarks'],
|
|---|
| 610 | res['Habitat'],
|
|---|
| 611 | res['basisOfRecord'],
|
|---|
| 612 | res['preparations'],
|
|---|
| 613 | res['sex'],
|
|---|
| 614 | res['type'],
|
|---|
| 615 | res['locality'],
|
|---|
| 616 | res['verbatimLocality'],
|
|---|
| 617 | res['decimalLongitude'],
|
|---|
| 618 | res['decimalLatitude'],
|
|---|
| 619 | res['geodeticDatum'],
|
|---|
| 620 | res['higerGeography'],
|
|---|
| 621 | res['continent'],
|
|---|
| 622 | res['country'],
|
|---|
| 623 | res['countryCode'],
|
|---|
| 624 | res['stateProvince'],
|
|---|
| 625 | res['gbifID'],
|
|---|
| 626 | res['protocol'],
|
|---|
| 627 | res['identifier'],
|
|---|
| 628 | res['recordedBy'],
|
|---|
| 629 | res['identificationID'],
|
|---|
| 630 | ','.join(res['identifiers']),
|
|---|
| 631 | '{}'.format(res['dateIdentified']) if res['dateIdentified'] else None,
|
|---|
| 632 | '{}'.format(res['modified']) if res['modified'] else None,
|
|---|
| 633 | res['institutionCode'],
|
|---|
| 634 | '{}'.format(res['lastInterpreted']) if res['lastInterpreted'] else None,
|
|---|
| 635 | '{}'.format(res['lastParsed']) if res['lastParsed'] else None,
|
|---|
| 636 | res['references'],
|
|---|
| 637 | ','.join(res['relations']),
|
|---|
| 638 | res['catalogNumber'],
|
|---|
| 639 | '{}'.format(res['occurrenceDetails']) if res['occurrenceDetails'] else None,
|
|---|
| 640 | res['datasetKey'],
|
|---|
| 641 | res['datasetName'],
|
|---|
| 642 | res['collectionCode'],
|
|---|
| 643 | res['rights'],
|
|---|
| 644 | res['rightsHolder'],
|
|---|
| 645 | res['license'],
|
|---|
| 646 | res['publishingOrgKey'],
|
|---|
| 647 | res['publishingCountry'],
|
|---|
| 648 | '{}'.format(res['lastCrawled']) if res['lastCrawled'] else None,
|
|---|
| 649 | res['specificEpithet'],
|
|---|
| 650 | ','.join(res['facts']),
|
|---|
| 651 | ','.join(res['issues']),
|
|---|
| 652 | ','.join(res['extensions']),
|
|---|
| 653 | res['language'],))
|
|---|
| 654 |
|
|---|
| 655 | cat = cat + 1
|
|---|
| 656 |
|
|---|
| 657 | # Close the current map if a map for each species is requested
|
|---|
| 658 | if species_maps:
|
|---|
| 659 | new.table.conn.commit()
|
|---|
| 660 | new.close()
|
|---|
| 661 | if not no_topo:
|
|---|
| 662 | grass.run_command('v.build', map=mapname, option='build')
|
|---|
| 663 |
|
|---|
| 664 | # Close the output map if not a map for each species is requested
|
|---|
| 665 | if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
|
|---|
| 666 | new.table.conn.commit()
|
|---|
| 667 | new.close()
|
|---|
| 668 | if not no_topo:
|
|---|
| 669 | grass.run_command('v.build', map=mapname, option='build')
|
|---|
| 670 |
|
|---|
| 671 | # Run the module
|
|---|
| 672 | # ToDo: Add an atexit procedure which closes and removes the current map
|
|---|
| 673 | if __name__ == "__main__":
|
|---|
| 674 | options, flags = grass.parser()
|
|---|
| 675 | sys.exit(main())
|
|---|