source: grass-addons/grass7/vector/v.class.mlpy/v.class.mlpy.py

Last change on this file was 68588, checked in by neteler, 8 years ago

v.class.mlpy addon: keyword added

  • Property svn:eol-style set to native
  • Property svn:executable set to *
  • Property svn:mime-type set to text/x-python
File size: 10.6 KB
Line 
1#!/usr/bin/env python
2
3############################################################################
4#
5# MODULE: v.class.mlpy
6# AUTHOR(S): Vaclav Petras
7# PURPOSE: Classifies features in vecor map.
8# COPYRIGHT: (C) 2012 by Vaclav Petras, and the GRASS Development Team
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20############################################################################
21
22#%module
23#% description: Vector supervised classification tool which uses attributes as classification parametres (order of columns matters, names not), cat column identifies feature, class_column is excluded from classification parametres.
24#% keyword: vector
25#% keyword: classification
26#% keyword: supervised
27#% keyword: machine learning
28#%end
29#%option G_OPT_V_MAP
30#% key: input
31#% description: Input vector map (attribut table required)
32#% required: yes
33#% multiple: no
34#%end
35#%option G_OPT_V_MAP
36#% key: training
37#% description: Training vector map (attribut table required)
38#% required: yes
39#% multiple: no
40#%end
41#%option G_OPT_V_FIELD
42#% key: class_column
43#% type: string
44#% label: Name of column containing class
45#% description: Used for both input/output and training dataset. If column does not exists in input map attribute table, it will be created.
46#% required: no
47#% multiple: no
48#% answer: class
49#%end
50#%option
51#% key: columns
52#% type: string
53#% label: Columns to be used in classification
54#% description: Columns to be used in classification. If left empty, all columns will be used for classification except for class_column and cat column.
55#% required: no
56#% multiple: yes
57#%end
58
59
60# TODO: add other classifiers
61# TODO: improve doc
62# TODO: input/training could be multiple
63# TODO: handle layers
64# TODO: ouput to new map (all classes/one class), depens what is faster
65
66
67import grass.script as grass
68
69import numpy as np
70
71
72def addColumn(mapName, columnName, columnType):
73 """Adds column to the map's table."""
74 columnDefinition = columnName + ' ' + columnType
75 grass.run_command('v.db.addcolumn', map=mapName,
76 columns=columnDefinition)
77
78
79def hasColumn(tableDescription, column):
80 """Checks if the column is in the table description
81
82 @todo This should be part of some object in the lib.
83 """
84 for col in tableDescription['cols']:
85 if col[0] == column:
86 return True
87 return False
88
89
90def updateColumn(mapName, column, cats, values=None):
91 """!Updates column values for rows with a given categories.
92
93 \param cats categories to be updated
94 or a list of tuples (cat, value) if \p values is None
95 \param values to be set for column (same length as cats) or \c None
96 """
97 statements = ''
98 for i in range(len(cats)):
99 if values is None:
100 cat = str(cats[i][0])
101 val = str(cats[i][1])
102 else:
103 cat = str(cats[i])
104 val = str(values[i])
105 statement = 'UPDATE ' + mapName + ' SET '
106 statement += column + ' = ' + val
107 statement += ' WHERE cat = ' + cat
108 statements += statement + ';\n'
109
110 grass.write_command('db.execute', input='-', stdin=statements)
111
112
113class Classifier:
114 """!Interface class between mlpy and other code
115
116 It does not uses numpy in the interface bu this may be wrong.
117 """
118 def __init__(self):
119 try:
120 import mlpy
121 except ImportError:
122 grass.fatal(_("Cannot import mlpy (http://mlpy.sourceforge.net)"
123 " library."
124 " Please install it or ensure that it is on path"
125 " (use PYTHONPATH variable)."))
126 # Pytlit has a problem with this mlpy and v.class.mlpy.py
127 # thus, warinings for objects from mlpy has to be disabled
128 self.mlclassifier = mlpy.DLDA(delta=0.01) # pylint: disable=E1101
129
130 def learn(self, values, classes):
131 self.mlclassifier.learn(np.array(values), np.array(classes))
132
133 def pred(self, values):
134 return self.mlclassifier.pred(np.array(values))
135
136
137# TODO: raise exception when str can not be float
138# TODO: repair those functions, probably create a class
139# TODO: use numpy or array
140def fromDbTableToSimpleTable(dbTable, columnsDescription, columnWithClass):
141 sTable = []
142 for row in dbTable:
143 sRow = []
144 for i, col in enumerate(row):
145 columnName = columnsDescription[i][0]
146 if columnName != columnWithClass and columnName != 'cat':
147 sRow.append(float(col))
148 sTable.append(sRow)
149
150 return sTable
151
152
153def extractColumnWithClass(dbTable, columnsDescription, columnWithClass):
154 classColumn = []
155 for row in dbTable:
156 for i, col in enumerate(row):
157 columnName = columnsDescription[i][0]
158 if columnName == columnWithClass:
159 classColumn.append(float(col))
160
161 return classColumn
162
163
164def extractNthColumn(dbTable, columnNumber):
165 classColumn = []
166 for row in dbTable:
167 for i, col in enumerate(row):
168 if columnNumber == i:
169 classColumn.append(float(col))
170
171 return classColumn
172
173
174def extractColumnWithCats(dbTable, columnsDescription):
175 column = []
176 for row in dbTable:
177 for i, col in enumerate(row):
178 columnName = columnsDescription[i][0]
179 if columnName == 'cat':
180 column.append(float(col))
181
182 return column
183
184
185# unused
186def fatal_noAttributeTable(mapName):
187 grass.fatal(_("Vector map <%s> has no or empty attribute table")
188 % mapName)
189
190
191def fatal_noEnoughColumns(mapName, ncols, required):
192 grass.fatal(_("Not enough columns in vector map <%(map)s>"
193 " (found %(ncols)s, expected at least %(r)s")
194 % {'map': mapName, 'ncols': ncols, 'r': required})
195
196
197def fatal_noClassColumn(mapName, columnName):
198 grass.fatal(_("Vector map <%(map)s> does not have"
199 " the column <%(col)s> cointaining class")
200 % {'map': mapName, 'col': columnName})
201
202
203def fatal_noRows(mapName):
204 grass.fatal(_("Empty attribute table for map vector <%(map)s>")
205 % {'map': mapName})
206
207
208def checkNcols(mapName, tableDescription, requiredNcols):
209 ncols = tableDescription['ncols']
210 if ncols < requiredNcols:
211 fatal_noEnoughColumns(mapName, ncols, requiredNcols)
212
213
214def checkNrows(mapName, tableDescription):
215 if not tableDescription['nrows'] > 0:
216 fatal_noRows(mapName)
217
218
219def checkDbConnection(mapName):
220 """! Checks if vector map has an attribute table.
221
222 \todo check layer
223 """
224 ret = grass.vector_db(mapName)
225 if not ret:
226 grass.fatal(_("Vector map <%s> has no attribute table") % mapName)
227
228
229def main():
230 options, unused = grass.parser()
231
232 mapName = options['input']
233 trainingMapName = options['training']
234
235 columnWithClass = options['class_column']
236
237 useAllColumns = True
238 if options['columns']:
239 # columns as string
240 columns = options['columns'].strip()
241 useAllColumns = False
242
243 # TODO: allow same input and output map only if --overwrite was specified
244 # TODO: is adding column overwriting or overwriting is only updating of existing?
245
246 # variable names conected to training dataset have training prefix
247 # variable names conected to classified dataset have no prefix
248
249 # checking database connection (if map has a table)
250 # TODO: layer
251 checkDbConnection(trainingMapName)
252 checkDbConnection(mapName)
253
254 # loading descriptions first to check them
255
256 trainingTableDescription = grass.db_describe(table=trainingMapName)
257
258 if useAllColumns:
259 trainingMinNcols = 3
260 checkNcols(trainingMapName, trainingTableDescription, trainingMinNcols)
261 else:
262 pass
263
264 checkNrows(trainingMapName, trainingTableDescription)
265
266 if not hasColumn(trainingTableDescription, columnWithClass):
267 fatal_noClassColumn(trainingMapName, columnWithClass)
268
269 tableDescription = grass.db_describe(table=mapName)
270
271 if useAllColumns:
272 minNcols = 2
273 checkNcols(mapName, tableDescription, minNcols)
274 else:
275 pass
276
277 checkNrows(mapName, tableDescription)
278
279 # TODO: check same (+-1) number of columns
280
281 # loadnig data
282
283 # TODO: make fun from this
284 if useAllColumns:
285 dbTable = grass.db_select(table=trainingMapName)
286 else:
287 # assuming that columns concatenated by comma
288 sql = 'SELECT %s,%s FROM %s' % (columnWithClass, columns, trainingMapName)
289 dbTable = grass.db_select(sql=sql)
290
291 trainingParameters = fromDbTableToSimpleTable(dbTable,
292 columnsDescription=trainingTableDescription['cols'],
293 columnWithClass=columnWithClass)
294
295 if useAllColumns:
296 trainingClasses = extractColumnWithClass(dbTable,
297 columnsDescription=trainingTableDescription['cols'],
298 columnWithClass=columnWithClass)
299 else:
300 # FIXME: magic num?
301 trainingClasses = extractNthColumn(dbTable, 0)
302
303 # TODO: hard coded 'cat'?
304 if useAllColumns:
305 dbTable = grass.db_select(table=mapName)
306 else:
307 # assuming that columns concatenated by comma
308 sql = 'SELECT %s,%s FROM %s' % ('cat', columns, mapName)
309 dbTable = grass.db_select(sql=sql)
310
311 parameters = fromDbTableToSimpleTable(dbTable,
312 columnsDescription=tableDescription['cols'],
313 columnWithClass=columnWithClass)
314 if useAllColumns:
315 cats = extractColumnWithCats(dbTable, columnsDescription=tableDescription['cols'])
316 else:
317 cats = extractNthColumn(dbTable, 0)
318
319 # since dbTable can be big it is better to avoid to have it in memory twice
320 del dbTable
321 del trainingTableDescription
322
323 classifier = Classifier()
324 classifier.learn(trainingParameters, trainingClasses)
325 classes = classifier.pred(parameters)
326
327 # add column only if not exists and the classification was successful
328 if not hasColumn(tableDescription, columnWithClass):
329 addColumn(mapName, columnWithClass, 'int')
330
331 updateColumn(mapName, columnWithClass, cats, classes)
332
333 # TODO: outupt as a new map (use INSERT, can be faster)
334 # TODO: output as a new layer?
335
336
337if __name__ == "__main__":
338 main()
Note: See TracBrowser for help on using the repository browser.