001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.data.validation.tests; 003 004import static org.openstreetmap.josm.tools.I18n.marktr; 005import static org.openstreetmap.josm.tools.I18n.tr; 006 007import java.awt.GridBagConstraints; 008import java.awt.event.ActionListener; 009import java.io.BufferedReader; 010import java.io.IOException; 011import java.lang.Character.UnicodeBlock; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collection; 015import java.util.Collections; 016import java.util.HashMap; 017import java.util.HashSet; 018import java.util.List; 019import java.util.Locale; 020import java.util.Map; 021import java.util.Map.Entry; 022import java.util.Set; 023import java.util.regex.Pattern; 024 025import javax.swing.JCheckBox; 026import javax.swing.JLabel; 027import javax.swing.JPanel; 028 029import org.openstreetmap.josm.command.ChangePropertyCommand; 030import org.openstreetmap.josm.command.ChangePropertyKeyCommand; 031import org.openstreetmap.josm.command.Command; 032import org.openstreetmap.josm.command.SequenceCommand; 033import org.openstreetmap.josm.data.osm.AbstractPrimitive; 034import org.openstreetmap.josm.data.osm.OsmPrimitive; 035import org.openstreetmap.josm.data.osm.Tag; 036import org.openstreetmap.josm.data.osm.Tagged; 037import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper; 038import org.openstreetmap.josm.data.validation.Severity; 039import org.openstreetmap.josm.data.validation.Test.TagTest; 040import org.openstreetmap.josm.data.validation.TestError; 041import org.openstreetmap.josm.data.validation.util.Entities; 042import org.openstreetmap.josm.gui.progress.ProgressMonitor; 043import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset; 044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem; 045import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets; 046import org.openstreetmap.josm.gui.tagging.presets.items.Check; 047import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup; 048import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem; 049import org.openstreetmap.josm.gui.widgets.EditableList; 050import org.openstreetmap.josm.io.CachedFile; 051import org.openstreetmap.josm.spi.preferences.Config; 052import org.openstreetmap.josm.tools.GBC; 053import org.openstreetmap.josm.tools.Logging; 054import org.openstreetmap.josm.tools.MultiMap; 055import org.openstreetmap.josm.tools.Utils; 056 057/** 058 * Check for misspelled or wrong tags 059 * 060 * @author frsantos 061 * @since 3669 062 */ 063public class TagChecker extends TagTest { 064 065 /** The config file of ignored tags */ 066 public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg"; 067 /** The config file of dictionary words */ 068 public static final String SPELL_FILE = "resource://data/validator/words.cfg"; 069 070 /** Normalized keys: the key should be substituted by the value if the key was not found in presets */ 071 private static final Map<String, String> harmonizedKeys = new HashMap<>(); 072 /** The spell check preset values which are not stored in TaggingPresets */ 073 private static volatile HashSet<String> additionalPresetsValueData; 074 /** often used tags which are not in presets */ 075 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>(); 076 077 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile( 078 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]"); 079 080 /** The TagChecker data */ 081 private static final List<String> ignoreDataStartsWith = new ArrayList<>(); 082 private static final Set<String> ignoreDataEquals = new HashSet<>(); 083 private static final List<String> ignoreDataEndsWith = new ArrayList<>(); 084 private static final List<Tag> ignoreDataTag = new ArrayList<>(); 085 /** tag keys that have only numerical values in the presets */ 086 private static final Set<String> ignoreForLevenshtein = new HashSet<>(); 087 088 /** The preferences prefix */ 089 protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName(); 090 091 /** 092 * The preference key to check values 093 */ 094 public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues"; 095 /** 096 * The preference key to check keys 097 */ 098 public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys"; 099 /** 100 * The preference key to enable complex checks 101 */ 102 public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex"; 103 /** 104 * The preference key to search for fixme tags 105 */ 106 public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes"; 107 108 /** 109 * The preference key for source files 110 * @see #DEFAULT_SOURCES 111 */ 112 public static final String PREF_SOURCES = PREFIX + ".source"; 113 114 private static final String BEFORE_UPLOAD = "BeforeUpload"; 115 /** 116 * The preference key to check keys - used before upload 117 */ 118 public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD; 119 /** 120 * The preference key to check values - used before upload 121 */ 122 public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD; 123 /** 124 * The preference key to run complex tests - used before upload 125 */ 126 public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD; 127 /** 128 * The preference key to search for fixmes - used before upload 129 */ 130 public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD; 131 132 private static final int MAX_LEVENSHTEIN_DISTANCE = 2; 133 134 protected boolean checkKeys; 135 protected boolean checkValues; 136 /** Was used for special configuration file, might be used to disable value spell checker. */ 137 protected boolean checkComplex; 138 protected boolean checkFixmes; 139 140 protected JCheckBox prefCheckKeys; 141 protected JCheckBox prefCheckValues; 142 protected JCheckBox prefCheckComplex; 143 protected JCheckBox prefCheckFixmes; 144 protected JCheckBox prefCheckPaint; 145 146 protected JCheckBox prefCheckKeysBeforeUpload; 147 protected JCheckBox prefCheckValuesBeforeUpload; 148 protected JCheckBox prefCheckComplexBeforeUpload; 149 protected JCheckBox prefCheckFixmesBeforeUpload; 150 protected JCheckBox prefCheckPaintBeforeUpload; 151 152 // CHECKSTYLE.OFF: SingleSpaceSeparator 153 protected static final int EMPTY_VALUES = 1200; 154 protected static final int INVALID_KEY = 1201; 155 protected static final int INVALID_VALUE = 1202; 156 protected static final int FIXME = 1203; 157 protected static final int INVALID_SPACE = 1204; 158 protected static final int INVALID_KEY_SPACE = 1205; 159 protected static final int INVALID_HTML = 1206; /* 1207 was PAINT */ 160 protected static final int LONG_VALUE = 1208; 161 protected static final int LONG_KEY = 1209; 162 protected static final int LOW_CHAR_VALUE = 1210; 163 protected static final int LOW_CHAR_KEY = 1211; 164 protected static final int MISSPELLED_VALUE = 1212; 165 protected static final int MISSPELLED_KEY = 1213; 166 protected static final int MULTIPLE_SPACES = 1214; 167 protected static final int MISSPELLED_VALUE_NO_FIX = 1215; 168 protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216; 169 // CHECKSTYLE.ON: SingleSpaceSeparator 170 171 protected EditableList sourcesList; 172 173 private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE); 174 175 /** 176 * Constructor 177 */ 178 public TagChecker() { 179 super(tr("Tag checker"), tr("This test checks for errors in tag keys and values.")); 180 } 181 182 @Override 183 public void initialize() throws IOException { 184 initializeData(); 185 initializePresets(); 186 analysePresets(); 187 } 188 189 /** 190 * Add presets that contain only numerical values to the ignore list 191 */ 192 private static void analysePresets() { 193 for (String key : TaggingPresets.getPresetKeys()) { 194 if (isKeyIgnored(key)) 195 continue; 196 boolean allNumerical = true; 197 Set<String> values = TaggingPresets.getPresetValues(key); 198 if (values.isEmpty()) 199 allNumerical = false; 200 for (String val : values) { 201 if (!isNum(val)) { 202 allNumerical = false; 203 break; 204 } 205 } 206 if (allNumerical) { 207 ignoreForLevenshtein.add(key); 208 } 209 } 210 } 211 212 /** 213 * Reads the spell-check file into a HashMap. 214 * The data file is a list of words, beginning with +/-. If it starts with +, 215 * the word is valid, but if it starts with -, the word should be replaced 216 * by the nearest + word before this. 217 * 218 * @throws IOException if any I/O error occurs 219 */ 220 private static void initializeData() throws IOException { 221 ignoreDataStartsWith.clear(); 222 ignoreDataEquals.clear(); 223 ignoreDataEndsWith.clear(); 224 ignoreDataTag.clear(); 225 harmonizedKeys.clear(); 226 ignoreForLevenshtein.clear(); 227 oftenUsedTags.clear(); 228 229 StringBuilder errorSources = new StringBuilder(); 230 for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) { 231 try ( 232 CachedFile cf = new CachedFile(source); 233 BufferedReader reader = cf.getContentReader() 234 ) { 235 String okValue = null; 236 boolean tagcheckerfile = false; 237 boolean ignorefile = false; 238 boolean isFirstLine = true; 239 String line; 240 while ((line = reader.readLine()) != null) { 241 if (line.isEmpty()) { 242 // ignore 243 } else if (line.startsWith("#")) { 244 if (line.startsWith("# JOSM TagChecker")) { 245 tagcheckerfile = true; 246 Logging.error(tr("Ignoring {0}. Support was dropped", source)); 247 } else 248 if (line.startsWith("# JOSM IgnoreTags")) { 249 ignorefile = true; 250 if (!DEFAULT_SOURCES.contains(source)) { 251 Logging.info(tr("Adding {0} to ignore tags", source)); 252 } 253 } 254 } else if (ignorefile) { 255 parseIgnoreFileLine(source, line); 256 } else if (tagcheckerfile) { 257 // ignore 258 } else if (line.charAt(0) == '+') { 259 okValue = line.substring(1); 260 } else if (line.charAt(0) == '-' && okValue != null) { 261 String hk = harmonizeKey(line.substring(1)); 262 if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) { 263 Logging.debug(tr("Line was ignored: {0}", line)); 264 } 265 } else { 266 Logging.error(tr("Invalid spellcheck line: {0}", line)); 267 } 268 if (isFirstLine) { 269 isFirstLine = false; 270 if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) { 271 Logging.info(tr("Adding {0} to spellchecker", source)); 272 } 273 } 274 } 275 } catch (IOException e) { 276 Logging.error(e); 277 errorSources.append(source).append('\n'); 278 } 279 } 280 281 if (errorSources.length() > 0) 282 throw new IOException(tr("Could not access data file(s):\n{0}", errorSources)); 283 } 284 285 /** 286 * Parse a line found in a configuration file 287 * @param source name of configuration file 288 * @param line the line to parse 289 */ 290 private static void parseIgnoreFileLine(String source, String line) { 291 line = line.trim(); 292 if (line.length() < 4) { 293 return; 294 } 295 try { 296 String key = line.substring(0, 2); 297 line = line.substring(2); 298 299 switch (key) { 300 case "S:": 301 ignoreDataStartsWith.add(line); 302 break; 303 case "E:": 304 ignoreDataEquals.add(line); 305 addToKeyDictionary(line); 306 break; 307 case "F:": 308 ignoreDataEndsWith.add(line); 309 break; 310 case "K:": 311 Tag tag = Tag.ofString(line); 312 ignoreDataTag.add(tag); 313 oftenUsedTags.put(tag.getKey(), tag.getValue()); 314 addToKeyDictionary(tag.getKey()); 315 break; 316 default: 317 if (!key.startsWith(";")) { 318 Logging.warn("Unsupported TagChecker key: " + key); 319 } 320 } 321 } catch (IllegalArgumentException e) { 322 Logging.error("Invalid line in {0} : {1}", source, e.getMessage()); 323 Logging.trace(e); 324 } 325 } 326 327 private static void addToKeyDictionary(String key) { 328 if (key != null) { 329 String hk = harmonizeKey(key); 330 if (!key.equals(hk)) { 331 harmonizedKeys.put(hk, key); 332 } 333 } 334 } 335 336 /** 337 * Reads the presets data. 338 * 339 */ 340 public static void initializePresets() { 341 342 if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true)) 343 return; 344 345 Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets(); 346 if (!presets.isEmpty()) { 347 initAdditionalPresetsValueData(); 348 for (TaggingPreset p : presets) { 349 for (TaggingPresetItem i : p.data) { 350 if (i instanceof KeyedItem) { 351 addPresetValue((KeyedItem) i); 352 } else if (i instanceof CheckGroup) { 353 for (Check c : ((CheckGroup) i).checks) { 354 addPresetValue(c); 355 } 356 } 357 } 358 } 359 } 360 } 361 362 private static void initAdditionalPresetsValueData() { 363 additionalPresetsValueData = new HashSet<>(); 364 for (String a : AbstractPrimitive.getUninterestingKeys()) { 365 additionalPresetsValueData.add(a); 366 } 367 for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys", 368 Arrays.asList("is_in", "int_ref", "fixme", "population"))) { 369 additionalPresetsValueData.add(a); 370 } 371 } 372 373 private static void addPresetValue(KeyedItem ky) { 374 if (ky.key != null && ky.getValues() != null) { 375 addToKeyDictionary(ky.key); 376 } 377 } 378 379 /** 380 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters) 381 * @param s string to check 382 * @return {@code true} if {@code s} contains non-printing control characters 383 */ 384 static boolean containsUnwantedNonPrintingControlCharacter(String s) { 385 return s != null && !s.isEmpty() && ( 386 isJoiningChar(s.charAt(0)) || 387 isJoiningChar(s.charAt(s.length() - 1)) || 388 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c)) 389 ); 390 } 391 392 private static boolean isAsciiControlChar(int c) { 393 return c < 0x20 || c == 0x7F; 394 } 395 396 private static boolean isNewLineChar(int c) { 397 return c == 0x0a || c == 0x0d; 398 } 399 400 private static boolean isJoiningChar(int c) { 401 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ 402 } 403 404 private static boolean isBidiControlChar(int c) { 405 /* check for range 0x200e to 0x200f (LRM, RLM) or 406 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 407 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e); 408 } 409 410 static String removeUnwantedNonPrintingControlCharacters(String s) { 411 // Remove all unwanted characters 412 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll(""); 413 // Remove joining characters located at the beginning of the string 414 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) { 415 result = result.substring(1); 416 } 417 // Remove joining characters located at the end of the string 418 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) { 419 result = result.substring(0, result.length() - 1); 420 } 421 return result; 422 } 423 424 private static boolean containsUnusualUnicodeCharacter(String key, String value) { 425 return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, UnicodeBlock.of(c))); 426 } 427 428 /** 429 * Detects highly suspicious Unicode characters that have been seen in OSM database. 430 * @param key tag key 431 * @param b Unicode block of the current character 432 * @return {@code true} if the current unicode block is very unusual for the given key 433 */ 434 private static boolean isUnusualUnicodeBlock(String key, UnicodeBlock b) { 435 return isUnusualPhoneticUse(key, b) || isUnusualBmpUse(b) || isUnusualSmpUse(b); 436 } 437 438 private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b) { 439 return (b == UnicodeBlock.IPA_EXTENSIONS // U+0250..U+02AF 440 || b == UnicodeBlock.PHONETIC_EXTENSIONS // U+1D00..U+1D7F 441 || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT) // U+1D80..U+1DBF 442 && !key.endsWith(":pronunciation"); 443 } 444 445 private static boolean isUnusualBmpUse(UnicodeBlock b) { 446 // CHECKSTYLE.OFF: BooleanExpressionComplexity 447 return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS // U+20D0..U+20FF 448 || b == UnicodeBlock.ARROWS // U+2190..U+21FF 449 || b == UnicodeBlock.MATHEMATICAL_OPERATORS // U+2200..U+22FF 450 || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS // U+2460..U+24FF 451 || b == UnicodeBlock.BOX_DRAWING // U+2500..U+257F 452 || b == UnicodeBlock.GEOMETRIC_SHAPES // U+25A0..U+25FF 453 || b == UnicodeBlock.DINGBATS // U+2700..U+27BF 454 || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS // U+2B00..U+2BFF 455 || b == UnicodeBlock.GLAGOLITIC // U+2C00..U+2C5F 456 || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO // U+3130..U+318F 457 || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS // U+3200..U+32FF 458 || b == UnicodeBlock.LATIN_EXTENDED_D // U+A720..U+A7FF 459 || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS // U+F900..U+FAFF 460 || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS // U+FB00..U+FB4F 461 || b == UnicodeBlock.VARIATION_SELECTORS // U+FE00..U+FE0F 462 || b == UnicodeBlock.SPECIALS; // U+FFF0..U+FFFF 463 // CHECKSTYLE.ON: BooleanExpressionComplexity 464 } 465 466 private static boolean isUnusualSmpUse(UnicodeBlock b) { 467 // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+ 468 return b == UnicodeBlock.MUSICAL_SYMBOLS // U+1D100..U+1D1FF 469 || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT // U+1F100..U+1F1FF 470 || b == UnicodeBlock.EMOTICONS // U+1F600..U+1F64F 471 || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS; // U+1F680..U+1F6FF 472 } 473 474 /** 475 * Get set of preset values for the given key. 476 * @param key the key 477 * @return null if key is not in presets or in additionalPresetsValueData, 478 * else a set which might be empty. 479 */ 480 private static Set<String> getPresetValues(String key) { 481 Set<String> res = TaggingPresets.getPresetValues(key); 482 if (res != null) 483 return res; 484 if (additionalPresetsValueData.contains(key)) 485 return Collections.emptySet(); 486 // null means key is not known 487 return null; 488 } 489 490 /** 491 * Determines if the given key is in internal presets. 492 * @param key key 493 * @return {@code true} if the given key is in internal presets 494 * @since 9023 495 */ 496 public static boolean isKeyInPresets(String key) { 497 return TaggingPresets.getPresetValues(key) != null; 498 } 499 500 /** 501 * Determines if the given tag is in internal presets. 502 * @param key key 503 * @param value value 504 * @return {@code true} if the given tag is in internal presets 505 * @since 9023 506 */ 507 public static boolean isTagInPresets(String key, String value) { 508 final Set<String> values = getPresetValues(key); 509 return values != null && values.contains(value); 510 } 511 512 /** 513 * Returns the list of ignored tags. 514 * @return the list of ignored tags 515 * @since 9023 516 */ 517 public static List<Tag> getIgnoredTags() { 518 return new ArrayList<>(ignoreDataTag); 519 } 520 521 /** 522 * Determines if the given tag key is ignored for checks "key/tag not in presets". 523 * @param key key 524 * @return true if the given key is ignored 525 */ 526 private static boolean isKeyIgnored(String key) { 527 if (ignoreDataEquals.contains(key)) { 528 return true; 529 } 530 for (String a : ignoreDataStartsWith) { 531 if (key.startsWith(a)) { 532 return true; 533 } 534 } 535 for (String a : ignoreDataEndsWith) { 536 if (key.endsWith(a)) { 537 return true; 538 } 539 } 540 return false; 541 } 542 543 /** 544 * Determines if the given tag is ignored for checks "key/tag not in presets". 545 * @param key key 546 * @param value value 547 * @return {@code true} if the given tag is ignored 548 * @since 9023 549 */ 550 public static boolean isTagIgnored(String key, String value) { 551 if (isKeyIgnored(key)) 552 return true; 553 final Set<String> values = getPresetValues(key); 554 if (values != null && values.isEmpty()) 555 return true; 556 if (!isTagInPresets(key, value)) { 557 for (Tag a : ignoreDataTag) { 558 if (key.equals(a.getKey()) && value.equals(a.getValue())) { 559 return true; 560 } 561 } 562 } 563 return false; 564 } 565 566 /** 567 * Checks the primitive tags 568 * @param p The primitive to check 569 */ 570 @Override 571 public void check(OsmPrimitive p) { 572 if (!p.isTagged()) 573 return; 574 575 // Just a collection to know if a primitive has been already marked with error 576 MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>(); 577 578 for (Entry<String, String> prop : p.getKeys().entrySet()) { 579 String s = marktr("Tag ''{0}'' invalid."); 580 String key = prop.getKey(); 581 String value = prop.getValue(); 582 583 if (checkKeys) { 584 checkSingleTagKeySimple(withErrors, p, s, key); 585 } 586 if (checkValues) { 587 checkSingleTagValueSimple(withErrors, p, s, key, value); 588 checkSingleTagComplex(withErrors, p, key, value); 589 } 590 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { 591 errors.add(TestError.builder(this, Severity.OTHER, FIXME) 592 .message(tr("FIXMES")) 593 .primitives(p) 594 .build()); 595 withErrors.put(p, "FIXME"); 596 } 597 } 598 } 599 600 private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) { 601 if (!checkValues || value == null) 602 return; 603 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) { 604 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE) 605 .message(tr("Tag value contains non-printing character"), s, key) 606 .primitives(p) 607 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value))) 608 .build()); 609 withErrors.put(p, "ICV"); 610 } 611 if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) { 612 errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE) 613 .message(tr("Tag value contains unusual Unicode character"), s, key) 614 .primitives(p) 615 .build()); 616 withErrors.put(p, "UUCV"); 617 } 618 if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) { 619 errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE) 620 .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key) 621 .primitives(p) 622 .build()); 623 withErrors.put(p, "LV"); 624 } 625 if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) { 626 errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES) 627 .message(tr("Tags with empty values"), s, key) 628 .primitives(p) 629 .build()); 630 withErrors.put(p, "EV"); 631 } 632 final String errTypeSpace = "SPACE"; 633 if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) { 634 errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE) 635 .message(tr("Property values start or end with white space"), s, key) 636 .primitives(p) 637 .build()); 638 withErrors.put(p, errTypeSpace); 639 } 640 if (value.contains(" ") && !withErrors.contains(p, errTypeSpace)) { 641 errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES) 642 .message(tr("Property values contain multiple white spaces"), s, key) 643 .primitives(p) 644 .build()); 645 withErrors.put(p, errTypeSpace); 646 } 647 if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) { 648 errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML) 649 .message(tr("Property values contain HTML entity"), s, key) 650 .primitives(p) 651 .build()); 652 withErrors.put(p, "HTML"); 653 } 654 } 655 656 private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) { 657 if (!checkKeys || key == null) 658 return; 659 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) { 660 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY) 661 .message(tr("Tag key contains non-printing character"), s, key) 662 .primitives(p) 663 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key))) 664 .build()); 665 withErrors.put(p, "ICK"); 666 } 667 if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) { 668 errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY) 669 .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key) 670 .primitives(p) 671 .build()); 672 withErrors.put(p, "LK"); 673 } 674 if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) { 675 errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE) 676 .message(tr("Invalid white space in property key"), s, key) 677 .primitives(p) 678 .build()); 679 withErrors.put(p, "IPK"); 680 } 681 } 682 683 private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) { 684 if (!checkValues || key == null || value == null || value.isEmpty()) 685 return; 686 if (additionalPresetsValueData != null && !isTagIgnored(key, value)) { 687 if (!isKeyInPresets(key)) { 688 spellCheckKey(withErrors, p, key); 689 } else if (!isTagInPresets(key, value)) { 690 if (oftenUsedTags.contains(key, value)) { 691 // tag is quite often used but not in presets 692 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 693 .message(tr("Presets do not contain property value"), 694 marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key) 695 .primitives(p) 696 .build()); 697 withErrors.put(p, "UPV"); 698 } else { 699 tryGuess(p, key, value, withErrors); 700 } 701 } 702 } 703 } 704 705 private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) { 706 String prettifiedKey = harmonizeKey(key); 707 String fixedKey; 708 if (ignoreDataEquals.contains(prettifiedKey)) { 709 fixedKey = prettifiedKey; 710 } else { 711 fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey); 712 } 713 if (fixedKey == null) { 714 for (Tag a : ignoreDataTag) { 715 if (a.getKey().equals(prettifiedKey)) { 716 fixedKey = prettifiedKey; 717 break; 718 } 719 } 720 } 721 722 if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) { 723 final String proposedKey = fixedKey; 724 // misspelled preset key 725 final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY) 726 .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey) 727 .primitives(p); 728 if (p.hasKey(fixedKey)) { 729 errors.add(error.build()); 730 } else { 731 errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build()); 732 } 733 withErrors.put(p, "WPK"); 734 } else { 735 errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY) 736 .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key) 737 .primitives(p) 738 .build()); 739 withErrors.put(p, "UPK"); 740 } 741 } 742 743 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) { 744 // try to fix common typos and check again if value is still unknown 745 final String harmonizedValue = harmonizeValue(value); 746 if (harmonizedValue == null || harmonizedValue.isEmpty()) 747 return; 748 String fixedValue = null; 749 List<Set<String>> sets = new ArrayList<>(); 750 Set<String> presetValues = getPresetValues(key); 751 if (presetValues != null) 752 sets.add(presetValues); 753 Set<String> usedValues = oftenUsedTags.get(key); 754 if (usedValues != null) 755 sets.add(usedValues); 756 for (Set<String> possibleValues: sets) { 757 if (possibleValues.contains(harmonizedValue)) { 758 fixedValue = harmonizedValue; 759 break; 760 } 761 } 762 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) { 763 int maxPresetValueLen = 0; 764 List<String> fixVals = new ArrayList<>(); 765 // use Levenshtein distance to find typical typos 766 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; 767 String closest = null; 768 for (Set<String> possibleValues: sets) { 769 for (String possibleVal : possibleValues) { 770 if (possibleVal.isEmpty()) 771 continue; 772 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); 773 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { 774 // don't suggest fix value when given value is short and lengths are too different 775 // for example surface=u would result in surface=mud 776 continue; 777 } 778 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); 779 if (dist >= harmonizedValue.length()) { 780 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. 781 continue; 782 } 783 if (dist < minDist) { 784 closest = possibleVal; 785 minDist = dist; 786 fixVals.clear(); 787 fixVals.add(possibleVal); 788 } else if (dist == minDist) { 789 fixVals.add(possibleVal); 790 } 791 } 792 } 793 794 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE 795 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { 796 if (fixVals.size() < 2) { 797 fixedValue = closest; 798 } else { 799 Collections.sort(fixVals); 800 // misspelled preset value with multiple good alternatives 801 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) 802 .message(tr("Unknown property value"), 803 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"), 804 value, key, fixVals) 805 .primitives(p).build()); 806 withErrors.put(p, "WPV"); 807 return; 808 } 809 } 810 } 811 if (fixedValue != null && !fixedValue.equals(value)) { 812 final String newValue = fixedValue; 813 // misspelled preset value 814 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) 815 .message(tr("Unknown property value"), 816 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue) 817 .primitives(p) 818 .build()); 819 withErrors.put(p, "WPV"); 820 } else { 821 // unknown preset value 822 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 823 .message(tr("Presets do not contain property value"), 824 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key) 825 .primitives(p) 826 .build()); 827 withErrors.put(p, "UPV"); 828 } 829 } 830 831 private static boolean isNum(String harmonizedValue) { 832 try { 833 Double.parseDouble(harmonizedValue); 834 return true; 835 } catch (NumberFormatException e) { 836 return false; 837 } 838 } 839 840 private static boolean isFixme(String key, String value) { 841 return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo") 842 || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete"); 843 } 844 845 private static String harmonizeKey(String key) { 846 return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,"); 847 } 848 849 private static String harmonizeValue(String value) { 850 return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,"); 851 } 852 853 @Override 854 public void startTest(ProgressMonitor monitor) { 855 super.startTest(monitor); 856 checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true); 857 if (isBeforeUpload) { 858 checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true); 859 } 860 861 checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true); 862 if (isBeforeUpload) { 863 checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true); 864 } 865 866 checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true); 867 if (isBeforeUpload) { 868 checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true); 869 } 870 871 checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true); 872 if (isBeforeUpload) { 873 checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true); 874 } 875 } 876 877 @Override 878 public void visit(Collection<OsmPrimitive> selection) { 879 if (checkKeys || checkValues || checkComplex || checkFixmes) { 880 super.visit(selection); 881 } 882 } 883 884 @Override 885 public void addGui(JPanel testPanel) { 886 GBC a = GBC.eol(); 887 a.anchor = GridBagConstraints.EAST; 888 889 testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0)); 890 891 prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true)); 892 prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words.")); 893 testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0)); 894 895 prefCheckKeysBeforeUpload = new JCheckBox(); 896 prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true)); 897 testPanel.add(prefCheckKeysBeforeUpload, a); 898 899 prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true)); 900 prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules.")); 901 testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0)); 902 903 prefCheckComplexBeforeUpload = new JCheckBox(); 904 prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true)); 905 testPanel.add(prefCheckComplexBeforeUpload, a); 906 907 final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES); 908 sourcesList = new EditableList(tr("TagChecker source")); 909 sourcesList.setItems(sources); 910 testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0)); 911 testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0)); 912 913 ActionListener disableCheckActionListener = e -> handlePrefEnable(); 914 prefCheckKeys.addActionListener(disableCheckActionListener); 915 prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener); 916 prefCheckComplex.addActionListener(disableCheckActionListener); 917 prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener); 918 919 handlePrefEnable(); 920 921 prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true)); 922 prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets.")); 923 testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0)); 924 925 prefCheckValuesBeforeUpload = new JCheckBox(); 926 prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true)); 927 testPanel.add(prefCheckValuesBeforeUpload, a); 928 929 prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true)); 930 prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value.")); 931 testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0)); 932 933 prefCheckFixmesBeforeUpload = new JCheckBox(); 934 prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true)); 935 testPanel.add(prefCheckFixmesBeforeUpload, a); 936 } 937 938 /** 939 * Enables/disables the source list field 940 */ 941 public void handlePrefEnable() { 942 boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected() 943 || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 944 sourcesList.setEnabled(selected); 945 } 946 947 @Override 948 public boolean ok() { 949 enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected(); 950 testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected() 951 || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected(); 952 953 Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected()); 954 Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected()); 955 Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected()); 956 Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected()); 957 Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected()); 958 Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected()); 959 Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected()); 960 Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected()); 961 return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems()); 962 } 963 964 @Override 965 public Command fixError(TestError testError) { 966 List<Command> commands = new ArrayList<>(50); 967 968 Collection<? extends OsmPrimitive> primitives = testError.getPrimitives(); 969 for (OsmPrimitive p : primitives) { 970 Map<String, String> tags = p.getKeys(); 971 if (tags.isEmpty()) { 972 continue; 973 } 974 975 for (Entry<String, String> prop: tags.entrySet()) { 976 String key = prop.getKey(); 977 String value = prop.getValue(); 978 if (value == null || value.trim().isEmpty()) { 979 commands.add(new ChangePropertyCommand(p, key, null)); 980 } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains(" ")) { 981 commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value))); 982 } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains(" ")) { 983 commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key))); 984 } else { 985 String evalue = Entities.unescape(value); 986 if (!evalue.equals(value)) { 987 commands.add(new ChangePropertyCommand(p, key, evalue)); 988 } 989 } 990 } 991 } 992 993 if (commands.isEmpty()) 994 return null; 995 if (commands.size() == 1) 996 return commands.get(0); 997 998 return new SequenceCommand(tr("Fix tags"), commands); 999 } 1000 1001 @Override 1002 public boolean isFixable(TestError testError) { 1003 if (testError.getTester() instanceof TagChecker) { 1004 int code = testError.getCode(); 1005 return code == EMPTY_VALUES || code == INVALID_SPACE || 1006 code == INVALID_KEY_SPACE || code == INVALID_HTML || 1007 code == MULTIPLE_SPACES; 1008 } 1009 1010 return false; 1011 } 1012}