| 1 | #!/usr/bin/env python
|
|---|
| 2 |
|
|---|
| 3 | ############################################################################
|
|---|
| 4 | #
|
|---|
| 5 | # MODULE: v.class.mlpy
|
|---|
| 6 | # AUTHOR(S): Vaclav Petras
|
|---|
| 7 | # PURPOSE: Classifies features in vecor map.
|
|---|
| 8 | # COPYRIGHT: (C) 2012 by Vaclav Petras, and the GRASS Development Team
|
|---|
| 9 | #
|
|---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
|---|
| 11 | # it under the terms of the GNU General Public License as published by
|
|---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
|---|
| 13 | # (at your option) any later version.
|
|---|
| 14 | #
|
|---|
| 15 | # This program is distributed in the hope that it will be useful,
|
|---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 18 | # GNU General Public License for more details.
|
|---|
| 19 | #
|
|---|
| 20 | ############################################################################
|
|---|
| 21 |
|
|---|
| 22 | #%module
|
|---|
| 23 | #% description: Vector supervised classification tool which uses attributes as classification parametres (order of columns matters, names not), cat column identifies feature, class_column is excluded from classification parametres.
|
|---|
| 24 | #% keyword: vector
|
|---|
| 25 | #% keyword: classification
|
|---|
| 26 | #% keyword: supervised
|
|---|
| 27 | #% keyword: machine learning
|
|---|
| 28 | #%end
|
|---|
| 29 | #%option G_OPT_V_MAP
|
|---|
| 30 | #% key: input
|
|---|
| 31 | #% description: Input vector map (attribut table required)
|
|---|
| 32 | #% required: yes
|
|---|
| 33 | #% multiple: no
|
|---|
| 34 | #%end
|
|---|
| 35 | #%option G_OPT_V_MAP
|
|---|
| 36 | #% key: training
|
|---|
| 37 | #% description: Training vector map (attribut table required)
|
|---|
| 38 | #% required: yes
|
|---|
| 39 | #% multiple: no
|
|---|
| 40 | #%end
|
|---|
| 41 | #%option G_OPT_V_FIELD
|
|---|
| 42 | #% key: class_column
|
|---|
| 43 | #% type: string
|
|---|
| 44 | #% label: Name of column containing class
|
|---|
| 45 | #% description: Used for both input/output and training dataset. If column does not exists in input map attribute table, it will be created.
|
|---|
| 46 | #% required: no
|
|---|
| 47 | #% multiple: no
|
|---|
| 48 | #% answer: class
|
|---|
| 49 | #%end
|
|---|
| 50 | #%option
|
|---|
| 51 | #% key: columns
|
|---|
| 52 | #% type: string
|
|---|
| 53 | #% label: Columns to be used in classification
|
|---|
| 54 | #% description: Columns to be used in classification. If left empty, all columns will be used for classification except for class_column and cat column.
|
|---|
| 55 | #% required: no
|
|---|
| 56 | #% multiple: yes
|
|---|
| 57 | #%end
|
|---|
| 58 |
|
|---|
| 59 |
|
|---|
| 60 | # TODO: add other classifiers
|
|---|
| 61 | # TODO: improve doc
|
|---|
| 62 | # TODO: input/training could be multiple
|
|---|
| 63 | # TODO: handle layers
|
|---|
| 64 | # TODO: ouput to new map (all classes/one class), depens what is faster
|
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 | import grass.script as grass
|
|---|
| 68 |
|
|---|
| 69 | import numpy as np
|
|---|
| 70 |
|
|---|
| 71 |
|
|---|
| 72 | def addColumn(mapName, columnName, columnType):
|
|---|
| 73 | """Adds column to the map's table."""
|
|---|
| 74 | columnDefinition = columnName + ' ' + columnType
|
|---|
| 75 | grass.run_command('v.db.addcolumn', map=mapName,
|
|---|
| 76 | columns=columnDefinition)
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 | def hasColumn(tableDescription, column):
|
|---|
| 80 | """Checks if the column is in the table description
|
|---|
| 81 |
|
|---|
| 82 | @todo This should be part of some object in the lib.
|
|---|
| 83 | """
|
|---|
| 84 | for col in tableDescription['cols']:
|
|---|
| 85 | if col[0] == column:
|
|---|
| 86 | return True
|
|---|
| 87 | return False
|
|---|
| 88 |
|
|---|
| 89 |
|
|---|
| 90 | def updateColumn(mapName, column, cats, values=None):
|
|---|
| 91 | """!Updates column values for rows with a given categories.
|
|---|
| 92 |
|
|---|
| 93 | \param cats categories to be updated
|
|---|
| 94 | or a list of tuples (cat, value) if \p values is None
|
|---|
| 95 | \param values to be set for column (same length as cats) or \c None
|
|---|
| 96 | """
|
|---|
| 97 | statements = ''
|
|---|
| 98 | for i in range(len(cats)):
|
|---|
| 99 | if values is None:
|
|---|
| 100 | cat = str(cats[i][0])
|
|---|
| 101 | val = str(cats[i][1])
|
|---|
| 102 | else:
|
|---|
| 103 | cat = str(cats[i])
|
|---|
| 104 | val = str(values[i])
|
|---|
| 105 | statement = 'UPDATE ' + mapName + ' SET '
|
|---|
| 106 | statement += column + ' = ' + val
|
|---|
| 107 | statement += ' WHERE cat = ' + cat
|
|---|
| 108 | statements += statement + ';\n'
|
|---|
| 109 |
|
|---|
| 110 | grass.write_command('db.execute', input='-', stdin=statements)
|
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 | class Classifier:
|
|---|
| 114 | """!Interface class between mlpy and other code
|
|---|
| 115 |
|
|---|
| 116 | It does not uses numpy in the interface bu this may be wrong.
|
|---|
| 117 | """
|
|---|
| 118 | def __init__(self):
|
|---|
| 119 | try:
|
|---|
| 120 | import mlpy
|
|---|
| 121 | except ImportError:
|
|---|
| 122 | grass.fatal(_("Cannot import mlpy (http://mlpy.sourceforge.net)"
|
|---|
| 123 | " library."
|
|---|
| 124 | " Please install it or ensure that it is on path"
|
|---|
| 125 | " (use PYTHONPATH variable)."))
|
|---|
| 126 | # Pytlit has a problem with this mlpy and v.class.mlpy.py
|
|---|
| 127 | # thus, warinings for objects from mlpy has to be disabled
|
|---|
| 128 | self.mlclassifier = mlpy.DLDA(delta=0.01) # pylint: disable=E1101
|
|---|
| 129 |
|
|---|
| 130 | def learn(self, values, classes):
|
|---|
| 131 | self.mlclassifier.learn(np.array(values), np.array(classes))
|
|---|
| 132 |
|
|---|
| 133 | def pred(self, values):
|
|---|
| 134 | return self.mlclassifier.pred(np.array(values))
|
|---|
| 135 |
|
|---|
| 136 |
|
|---|
| 137 | # TODO: raise exception when str can not be float
|
|---|
| 138 | # TODO: repair those functions, probably create a class
|
|---|
| 139 | # TODO: use numpy or array
|
|---|
| 140 | def fromDbTableToSimpleTable(dbTable, columnsDescription, columnWithClass):
|
|---|
| 141 | sTable = []
|
|---|
| 142 | for row in dbTable:
|
|---|
| 143 | sRow = []
|
|---|
| 144 | for i, col in enumerate(row):
|
|---|
| 145 | columnName = columnsDescription[i][0]
|
|---|
| 146 | if columnName != columnWithClass and columnName != 'cat':
|
|---|
| 147 | sRow.append(float(col))
|
|---|
| 148 | sTable.append(sRow)
|
|---|
| 149 |
|
|---|
| 150 | return sTable
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 | def extractColumnWithClass(dbTable, columnsDescription, columnWithClass):
|
|---|
| 154 | classColumn = []
|
|---|
| 155 | for row in dbTable:
|
|---|
| 156 | for i, col in enumerate(row):
|
|---|
| 157 | columnName = columnsDescription[i][0]
|
|---|
| 158 | if columnName == columnWithClass:
|
|---|
| 159 | classColumn.append(float(col))
|
|---|
| 160 |
|
|---|
| 161 | return classColumn
|
|---|
| 162 |
|
|---|
| 163 |
|
|---|
| 164 | def extractNthColumn(dbTable, columnNumber):
|
|---|
| 165 | classColumn = []
|
|---|
| 166 | for row in dbTable:
|
|---|
| 167 | for i, col in enumerate(row):
|
|---|
| 168 | if columnNumber == i:
|
|---|
| 169 | classColumn.append(float(col))
|
|---|
| 170 |
|
|---|
| 171 | return classColumn
|
|---|
| 172 |
|
|---|
| 173 |
|
|---|
| 174 | def extractColumnWithCats(dbTable, columnsDescription):
|
|---|
| 175 | column = []
|
|---|
| 176 | for row in dbTable:
|
|---|
| 177 | for i, col in enumerate(row):
|
|---|
| 178 | columnName = columnsDescription[i][0]
|
|---|
| 179 | if columnName == 'cat':
|
|---|
| 180 | column.append(float(col))
|
|---|
| 181 |
|
|---|
| 182 | return column
|
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 | # unused
|
|---|
| 186 | def fatal_noAttributeTable(mapName):
|
|---|
| 187 | grass.fatal(_("Vector map <%s> has no or empty attribute table")
|
|---|
| 188 | % mapName)
|
|---|
| 189 |
|
|---|
| 190 |
|
|---|
| 191 | def fatal_noEnoughColumns(mapName, ncols, required):
|
|---|
| 192 | grass.fatal(_("Not enough columns in vector map <%(map)s>"
|
|---|
| 193 | " (found %(ncols)s, expected at least %(r)s")
|
|---|
| 194 | % {'map': mapName, 'ncols': ncols, 'r': required})
|
|---|
| 195 |
|
|---|
| 196 |
|
|---|
| 197 | def fatal_noClassColumn(mapName, columnName):
|
|---|
| 198 | grass.fatal(_("Vector map <%(map)s> does not have"
|
|---|
| 199 | " the column <%(col)s> cointaining class")
|
|---|
| 200 | % {'map': mapName, 'col': columnName})
|
|---|
| 201 |
|
|---|
| 202 |
|
|---|
| 203 | def fatal_noRows(mapName):
|
|---|
| 204 | grass.fatal(_("Empty attribute table for map vector <%(map)s>")
|
|---|
| 205 | % {'map': mapName})
|
|---|
| 206 |
|
|---|
| 207 |
|
|---|
| 208 | def checkNcols(mapName, tableDescription, requiredNcols):
|
|---|
| 209 | ncols = tableDescription['ncols']
|
|---|
| 210 | if ncols < requiredNcols:
|
|---|
| 211 | fatal_noEnoughColumns(mapName, ncols, requiredNcols)
|
|---|
| 212 |
|
|---|
| 213 |
|
|---|
| 214 | def checkNrows(mapName, tableDescription):
|
|---|
| 215 | if not tableDescription['nrows'] > 0:
|
|---|
| 216 | fatal_noRows(mapName)
|
|---|
| 217 |
|
|---|
| 218 |
|
|---|
| 219 | def checkDbConnection(mapName):
|
|---|
| 220 | """! Checks if vector map has an attribute table.
|
|---|
| 221 |
|
|---|
| 222 | \todo check layer
|
|---|
| 223 | """
|
|---|
| 224 | ret = grass.vector_db(mapName)
|
|---|
| 225 | if not ret:
|
|---|
| 226 | grass.fatal(_("Vector map <%s> has no attribute table") % mapName)
|
|---|
| 227 |
|
|---|
| 228 |
|
|---|
| 229 | def main():
|
|---|
| 230 | options, unused = grass.parser()
|
|---|
| 231 |
|
|---|
| 232 | mapName = options['input']
|
|---|
| 233 | trainingMapName = options['training']
|
|---|
| 234 |
|
|---|
| 235 | columnWithClass = options['class_column']
|
|---|
| 236 |
|
|---|
| 237 | useAllColumns = True
|
|---|
| 238 | if options['columns']:
|
|---|
| 239 | # columns as string
|
|---|
| 240 | columns = options['columns'].strip()
|
|---|
| 241 | useAllColumns = False
|
|---|
| 242 |
|
|---|
| 243 | # TODO: allow same input and output map only if --overwrite was specified
|
|---|
| 244 | # TODO: is adding column overwriting or overwriting is only updating of existing?
|
|---|
| 245 |
|
|---|
| 246 | # variable names conected to training dataset have training prefix
|
|---|
| 247 | # variable names conected to classified dataset have no prefix
|
|---|
| 248 |
|
|---|
| 249 | # checking database connection (if map has a table)
|
|---|
| 250 | # TODO: layer
|
|---|
| 251 | checkDbConnection(trainingMapName)
|
|---|
| 252 | checkDbConnection(mapName)
|
|---|
| 253 |
|
|---|
| 254 | # loading descriptions first to check them
|
|---|
| 255 |
|
|---|
| 256 | trainingTableDescription = grass.db_describe(table=trainingMapName)
|
|---|
| 257 |
|
|---|
| 258 | if useAllColumns:
|
|---|
| 259 | trainingMinNcols = 3
|
|---|
| 260 | checkNcols(trainingMapName, trainingTableDescription, trainingMinNcols)
|
|---|
| 261 | else:
|
|---|
| 262 | pass
|
|---|
| 263 |
|
|---|
| 264 | checkNrows(trainingMapName, trainingTableDescription)
|
|---|
| 265 |
|
|---|
| 266 | if not hasColumn(trainingTableDescription, columnWithClass):
|
|---|
| 267 | fatal_noClassColumn(trainingMapName, columnWithClass)
|
|---|
| 268 |
|
|---|
| 269 | tableDescription = grass.db_describe(table=mapName)
|
|---|
| 270 |
|
|---|
| 271 | if useAllColumns:
|
|---|
| 272 | minNcols = 2
|
|---|
| 273 | checkNcols(mapName, tableDescription, minNcols)
|
|---|
| 274 | else:
|
|---|
| 275 | pass
|
|---|
| 276 |
|
|---|
| 277 | checkNrows(mapName, tableDescription)
|
|---|
| 278 |
|
|---|
| 279 | # TODO: check same (+-1) number of columns
|
|---|
| 280 |
|
|---|
| 281 | # loadnig data
|
|---|
| 282 |
|
|---|
| 283 | # TODO: make fun from this
|
|---|
| 284 | if useAllColumns:
|
|---|
| 285 | dbTable = grass.db_select(table=trainingMapName)
|
|---|
| 286 | else:
|
|---|
| 287 | # assuming that columns concatenated by comma
|
|---|
| 288 | sql = 'SELECT %s,%s FROM %s' % (columnWithClass, columns, trainingMapName)
|
|---|
| 289 | dbTable = grass.db_select(sql=sql)
|
|---|
| 290 |
|
|---|
| 291 | trainingParameters = fromDbTableToSimpleTable(dbTable,
|
|---|
| 292 | columnsDescription=trainingTableDescription['cols'],
|
|---|
| 293 | columnWithClass=columnWithClass)
|
|---|
| 294 |
|
|---|
| 295 | if useAllColumns:
|
|---|
| 296 | trainingClasses = extractColumnWithClass(dbTable,
|
|---|
| 297 | columnsDescription=trainingTableDescription['cols'],
|
|---|
| 298 | columnWithClass=columnWithClass)
|
|---|
| 299 | else:
|
|---|
| 300 | # FIXME: magic num?
|
|---|
| 301 | trainingClasses = extractNthColumn(dbTable, 0)
|
|---|
| 302 |
|
|---|
| 303 | # TODO: hard coded 'cat'?
|
|---|
| 304 | if useAllColumns:
|
|---|
| 305 | dbTable = grass.db_select(table=mapName)
|
|---|
| 306 | else:
|
|---|
| 307 | # assuming that columns concatenated by comma
|
|---|
| 308 | sql = 'SELECT %s,%s FROM %s' % ('cat', columns, mapName)
|
|---|
| 309 | dbTable = grass.db_select(sql=sql)
|
|---|
| 310 |
|
|---|
| 311 | parameters = fromDbTableToSimpleTable(dbTable,
|
|---|
| 312 | columnsDescription=tableDescription['cols'],
|
|---|
| 313 | columnWithClass=columnWithClass)
|
|---|
| 314 | if useAllColumns:
|
|---|
| 315 | cats = extractColumnWithCats(dbTable, columnsDescription=tableDescription['cols'])
|
|---|
| 316 | else:
|
|---|
| 317 | cats = extractNthColumn(dbTable, 0)
|
|---|
| 318 |
|
|---|
| 319 | # since dbTable can be big it is better to avoid to have it in memory twice
|
|---|
| 320 | del dbTable
|
|---|
| 321 | del trainingTableDescription
|
|---|
| 322 |
|
|---|
| 323 | classifier = Classifier()
|
|---|
| 324 | classifier.learn(trainingParameters, trainingClasses)
|
|---|
| 325 | classes = classifier.pred(parameters)
|
|---|
| 326 |
|
|---|
| 327 | # add column only if not exists and the classification was successful
|
|---|
| 328 | if not hasColumn(tableDescription, columnWithClass):
|
|---|
| 329 | addColumn(mapName, columnWithClass, 'int')
|
|---|
| 330 |
|
|---|
| 331 | updateColumn(mapName, columnWithClass, cats, classes)
|
|---|
| 332 |
|
|---|
| 333 | # TODO: outupt as a new map (use INSERT, can be faster)
|
|---|
| 334 | # TODO: output as a new layer?
|
|---|
| 335 |
|
|---|
| 336 |
|
|---|
| 337 | if __name__ == "__main__":
|
|---|
| 338 | main()
|
|---|