001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.validation.tests;
003
004import static org.openstreetmap.josm.tools.I18n.marktr;
005import static org.openstreetmap.josm.tools.I18n.tr;
006
007import java.awt.GridBagConstraints;
008import java.awt.event.ActionListener;
009import java.io.BufferedReader;
010import java.io.IOException;
011import java.lang.Character.UnicodeBlock;
012import java.util.ArrayList;
013import java.util.Arrays;
014import java.util.Collection;
015import java.util.Collections;
016import java.util.HashMap;
017import java.util.HashSet;
018import java.util.List;
019import java.util.Locale;
020import java.util.Map;
021import java.util.Map.Entry;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import javax.swing.JCheckBox;
026import javax.swing.JLabel;
027import javax.swing.JPanel;
028
029import org.openstreetmap.josm.command.ChangePropertyCommand;
030import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
031import org.openstreetmap.josm.command.Command;
032import org.openstreetmap.josm.command.SequenceCommand;
033import org.openstreetmap.josm.data.osm.AbstractPrimitive;
034import org.openstreetmap.josm.data.osm.OsmPrimitive;
035import org.openstreetmap.josm.data.osm.Tag;
036import org.openstreetmap.josm.data.osm.Tagged;
037import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
038import org.openstreetmap.josm.data.validation.Severity;
039import org.openstreetmap.josm.data.validation.Test.TagTest;
040import org.openstreetmap.josm.data.validation.TestError;
041import org.openstreetmap.josm.data.validation.util.Entities;
042import org.openstreetmap.josm.gui.progress.ProgressMonitor;
043import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
045import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
046import org.openstreetmap.josm.gui.tagging.presets.items.Check;
047import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
048import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
049import org.openstreetmap.josm.gui.widgets.EditableList;
050import org.openstreetmap.josm.io.CachedFile;
051import org.openstreetmap.josm.spi.preferences.Config;
052import org.openstreetmap.josm.tools.GBC;
053import org.openstreetmap.josm.tools.Logging;
054import org.openstreetmap.josm.tools.MultiMap;
055import org.openstreetmap.josm.tools.Utils;
056
057/**
058 * Check for misspelled or wrong tags
059 *
060 * @author frsantos
061 * @since 3669
062 */
063public class TagChecker extends TagTest {
064
065    /** The config file of ignored tags */
066    public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
067    /** The config file of dictionary words */
068    public static final String SPELL_FILE = "resource://data/validator/words.cfg";
069
070    /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
071    private static final Map<String, String> harmonizedKeys = new HashMap<>();
072    /** The spell check preset values which are not stored in TaggingPresets */
073    private static volatile HashSet<String> additionalPresetsValueData;
074    /** often used tags which are not in presets */
075    private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
076
077    private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
078            "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
079
080    /** The TagChecker data */
081    private static final List<String> ignoreDataStartsWith = new ArrayList<>();
082    private static final Set<String> ignoreDataEquals = new HashSet<>();
083    private static final List<String> ignoreDataEndsWith = new ArrayList<>();
084    private static final List<Tag> ignoreDataTag = new ArrayList<>();
085    /** tag keys that have only numerical values in the presets */
086    private static final Set<String> ignoreForLevenshtein = new HashSet<>();
087
088    /** The preferences prefix */
089    protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
090
091    /**
092     * The preference key to check values
093     */
094    public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
095    /**
096     * The preference key to check keys
097     */
098    public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
099    /**
100     * The preference key to enable complex checks
101     */
102    public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
103    /**
104     * The preference key to search for fixme tags
105     */
106    public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
107
108    /**
109     * The preference key for source files
110     * @see #DEFAULT_SOURCES
111     */
112    public static final String PREF_SOURCES = PREFIX + ".source";
113
114    private static final String BEFORE_UPLOAD = "BeforeUpload";
115    /**
116     * The preference key to check keys - used before upload
117     */
118    public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
119    /**
120     * The preference key to check values - used before upload
121     */
122    public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
123    /**
124     * The preference key to run complex tests - used before upload
125     */
126    public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
127    /**
128     * The preference key to search for fixmes - used before upload
129     */
130    public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
131
132    private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
133
134    protected boolean checkKeys;
135    protected boolean checkValues;
136    /** Was used for special configuration file, might be used to disable value spell checker. */
137    protected boolean checkComplex;
138    protected boolean checkFixmes;
139
140    protected JCheckBox prefCheckKeys;
141    protected JCheckBox prefCheckValues;
142    protected JCheckBox prefCheckComplex;
143    protected JCheckBox prefCheckFixmes;
144    protected JCheckBox prefCheckPaint;
145
146    protected JCheckBox prefCheckKeysBeforeUpload;
147    protected JCheckBox prefCheckValuesBeforeUpload;
148    protected JCheckBox prefCheckComplexBeforeUpload;
149    protected JCheckBox prefCheckFixmesBeforeUpload;
150    protected JCheckBox prefCheckPaintBeforeUpload;
151
152    // CHECKSTYLE.OFF: SingleSpaceSeparator
153    protected static final int EMPTY_VALUES             = 1200;
154    protected static final int INVALID_KEY              = 1201;
155    protected static final int INVALID_VALUE            = 1202;
156    protected static final int FIXME                    = 1203;
157    protected static final int INVALID_SPACE            = 1204;
158    protected static final int INVALID_KEY_SPACE        = 1205;
159    protected static final int INVALID_HTML             = 1206; /* 1207 was PAINT */
160    protected static final int LONG_VALUE               = 1208;
161    protected static final int LONG_KEY                 = 1209;
162    protected static final int LOW_CHAR_VALUE           = 1210;
163    protected static final int LOW_CHAR_KEY             = 1211;
164    protected static final int MISSPELLED_VALUE         = 1212;
165    protected static final int MISSPELLED_KEY           = 1213;
166    protected static final int MULTIPLE_SPACES          = 1214;
167    protected static final int MISSPELLED_VALUE_NO_FIX  = 1215;
168    protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
169    // CHECKSTYLE.ON: SingleSpaceSeparator
170
171    protected EditableList sourcesList;
172
173    private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
174
175    /**
176     * Constructor
177     */
178    public TagChecker() {
179        super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
180    }
181
182    @Override
183    public void initialize() throws IOException {
184        initializeData();
185        initializePresets();
186        analysePresets();
187    }
188
189    /**
190     * Add presets that contain only numerical values to the ignore list
191     */
192    private static void analysePresets() {
193        for (String key : TaggingPresets.getPresetKeys()) {
194            if (isKeyIgnored(key))
195                continue;
196            boolean allNumerical = true;
197            Set<String> values = TaggingPresets.getPresetValues(key);
198            if (values.isEmpty())
199                allNumerical = false;
200            for (String val : values) {
201                if (!isNum(val)) {
202                    allNumerical = false;
203                    break;
204                }
205            }
206            if (allNumerical) {
207                ignoreForLevenshtein.add(key);
208            }
209        }
210    }
211
212    /**
213     * Reads the spell-check file into a HashMap.
214     * The data file is a list of words, beginning with +/-. If it starts with +,
215     * the word is valid, but if it starts with -, the word should be replaced
216     * by the nearest + word before this.
217     *
218     * @throws IOException if any I/O error occurs
219     */
220    private static void initializeData() throws IOException {
221        ignoreDataStartsWith.clear();
222        ignoreDataEquals.clear();
223        ignoreDataEndsWith.clear();
224        ignoreDataTag.clear();
225        harmonizedKeys.clear();
226        ignoreForLevenshtein.clear();
227        oftenUsedTags.clear();
228
229        StringBuilder errorSources = new StringBuilder();
230        for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
231            try (
232                CachedFile cf = new CachedFile(source);
233                BufferedReader reader = cf.getContentReader()
234            ) {
235                String okValue = null;
236                boolean tagcheckerfile = false;
237                boolean ignorefile = false;
238                boolean isFirstLine = true;
239                String line;
240                while ((line = reader.readLine()) != null) {
241                    if (line.isEmpty()) {
242                        // ignore
243                    } else if (line.startsWith("#")) {
244                        if (line.startsWith("# JOSM TagChecker")) {
245                            tagcheckerfile = true;
246                            Logging.error(tr("Ignoring {0}. Support was dropped", source));
247                        } else
248                        if (line.startsWith("# JOSM IgnoreTags")) {
249                            ignorefile = true;
250                            if (!DEFAULT_SOURCES.contains(source)) {
251                                Logging.info(tr("Adding {0} to ignore tags", source));
252                            }
253                        }
254                    } else if (ignorefile) {
255                        parseIgnoreFileLine(source, line);
256                    } else if (tagcheckerfile) {
257                        // ignore
258                    } else if (line.charAt(0) == '+') {
259                        okValue = line.substring(1);
260                    } else if (line.charAt(0) == '-' && okValue != null) {
261                        String hk = harmonizeKey(line.substring(1));
262                        if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) {
263                            Logging.debug(tr("Line was ignored: {0}", line));
264                        }
265                    } else {
266                        Logging.error(tr("Invalid spellcheck line: {0}", line));
267                    }
268                    if (isFirstLine) {
269                        isFirstLine = false;
270                        if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
271                            Logging.info(tr("Adding {0} to spellchecker", source));
272                        }
273                    }
274                }
275            } catch (IOException e) {
276                Logging.error(e);
277                errorSources.append(source).append('\n');
278            }
279        }
280
281        if (errorSources.length() > 0)
282            throw new IOException(tr("Could not access data file(s):\n{0}", errorSources));
283    }
284
285    /**
286     * Parse a line found in a configuration file
287     * @param source name of configuration file
288     * @param line the line to parse
289     */
290    private static void parseIgnoreFileLine(String source, String line) {
291        line = line.trim();
292        if (line.length() < 4) {
293            return;
294        }
295        try {
296            String key = line.substring(0, 2);
297            line = line.substring(2);
298
299            switch (key) {
300            case "S:":
301                ignoreDataStartsWith.add(line);
302                break;
303            case "E:":
304                ignoreDataEquals.add(line);
305                addToKeyDictionary(line);
306                break;
307            case "F:":
308                ignoreDataEndsWith.add(line);
309                break;
310            case "K:":
311                Tag tag = Tag.ofString(line);
312                ignoreDataTag.add(tag);
313                oftenUsedTags.put(tag.getKey(), tag.getValue());
314                addToKeyDictionary(tag.getKey());
315                break;
316            default:
317                if (!key.startsWith(";")) {
318                    Logging.warn("Unsupported TagChecker key: " + key);
319                }
320            }
321        } catch (IllegalArgumentException e) {
322            Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
323            Logging.trace(e);
324        }
325    }
326
327    private static void addToKeyDictionary(String key) {
328        if (key != null) {
329            String hk = harmonizeKey(key);
330            if (!key.equals(hk)) {
331                harmonizedKeys.put(hk, key);
332            }
333        }
334    }
335
336    /**
337     * Reads the presets data.
338     *
339     */
340    public static void initializePresets() {
341
342        if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
343            return;
344
345        Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
346        if (!presets.isEmpty()) {
347            initAdditionalPresetsValueData();
348            for (TaggingPreset p : presets) {
349                for (TaggingPresetItem i : p.data) {
350                    if (i instanceof KeyedItem) {
351                        addPresetValue((KeyedItem) i);
352                    } else if (i instanceof CheckGroup) {
353                        for (Check c : ((CheckGroup) i).checks) {
354                            addPresetValue(c);
355                        }
356                    }
357                }
358            }
359        }
360    }
361
362    private static void initAdditionalPresetsValueData() {
363        additionalPresetsValueData = new HashSet<>();
364        for (String a : AbstractPrimitive.getUninterestingKeys()) {
365            additionalPresetsValueData.add(a);
366        }
367        for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys",
368                Arrays.asList("is_in", "int_ref", "fixme", "population"))) {
369            additionalPresetsValueData.add(a);
370        }
371    }
372
373    private static void addPresetValue(KeyedItem ky) {
374        if (ky.key != null && ky.getValues() != null) {
375            addToKeyDictionary(ky.key);
376        }
377    }
378
379    /**
380     * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
381     * @param s string to check
382     * @return {@code true} if {@code s} contains non-printing control characters
383     */
384    static boolean containsUnwantedNonPrintingControlCharacter(String s) {
385        return s != null && !s.isEmpty() && (
386                isJoiningChar(s.charAt(0)) ||
387                isJoiningChar(s.charAt(s.length() - 1)) ||
388                s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
389                );
390    }
391
392    private static boolean isAsciiControlChar(int c) {
393        return c < 0x20 || c == 0x7F;
394    }
395
396    private static boolean isNewLineChar(int c) {
397        return c == 0x0a || c == 0x0d;
398    }
399
400    private static boolean isJoiningChar(int c) {
401        return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
402    }
403
404    private static boolean isBidiControlChar(int c) {
405        /* check for range 0x200e to 0x200f (LRM, RLM) or
406                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
407        return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
408    }
409
410    static String removeUnwantedNonPrintingControlCharacters(String s) {
411        // Remove all unwanted characters
412        String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
413        // Remove joining characters located at the beginning of the string
414        while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
415            result = result.substring(1);
416        }
417        // Remove joining characters located at the end of the string
418        while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
419            result = result.substring(0, result.length() - 1);
420        }
421        return result;
422    }
423
424    private static boolean containsUnusualUnicodeCharacter(String key, String value) {
425        return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, UnicodeBlock.of(c)));
426    }
427
428    /**
429     * Detects highly suspicious Unicode characters that have been seen in OSM database.
430     * @param key tag key
431     * @param b Unicode block of the current character
432     * @return {@code true} if the current unicode block is very unusual for the given key
433     */
434    private static boolean isUnusualUnicodeBlock(String key, UnicodeBlock b) {
435        return isUnusualPhoneticUse(key, b) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
436    }
437
438    private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b) {
439        return (b == UnicodeBlock.IPA_EXTENSIONS                        // U+0250..U+02AF
440             || b == UnicodeBlock.PHONETIC_EXTENSIONS                   // U+1D00..U+1D7F
441             || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT)       // U+1D80..U+1DBF
442                && !key.endsWith(":pronunciation");
443    }
444
445    private static boolean isUnusualBmpUse(UnicodeBlock b) {
446        // CHECKSTYLE.OFF: BooleanExpressionComplexity
447        return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS            // U+20D0..U+20FF
448            || b == UnicodeBlock.ARROWS                                 // U+2190..U+21FF
449            || b == UnicodeBlock.MATHEMATICAL_OPERATORS                 // U+2200..U+22FF
450            || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS                 // U+2460..U+24FF
451            || b == UnicodeBlock.BOX_DRAWING                            // U+2500..U+257F
452            || b == UnicodeBlock.GEOMETRIC_SHAPES                       // U+25A0..U+25FF
453            || b == UnicodeBlock.DINGBATS                               // U+2700..U+27BF
454            || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS       // U+2B00..U+2BFF
455            || b == UnicodeBlock.GLAGOLITIC                             // U+2C00..U+2C5F
456            || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO              // U+3130..U+318F
457            || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS        // U+3200..U+32FF
458            || b == UnicodeBlock.LATIN_EXTENDED_D                       // U+A720..U+A7FF
459            || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS           // U+F900..U+FAFF
460            || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS          // U+FB00..U+FB4F
461            || b == UnicodeBlock.VARIATION_SELECTORS                    // U+FE00..U+FE0F
462            || b == UnicodeBlock.SPECIALS;                              // U+FFF0..U+FFFF
463            // CHECKSTYLE.ON: BooleanExpressionComplexity
464    }
465
466    private static boolean isUnusualSmpUse(UnicodeBlock b) {
467        // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+
468        return b == UnicodeBlock.MUSICAL_SYMBOLS                        // U+1D100..U+1D1FF
469            || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT       // U+1F100..U+1F1FF
470            || b == UnicodeBlock.EMOTICONS                              // U+1F600..U+1F64F
471            || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS;             // U+1F680..U+1F6FF
472    }
473
474    /**
475     * Get set of preset values for the given key.
476     * @param key the key
477     * @return null if key is not in presets or in additionalPresetsValueData,
478     *  else a set which might be empty.
479     */
480    private static Set<String> getPresetValues(String key) {
481        Set<String> res = TaggingPresets.getPresetValues(key);
482        if (res != null)
483            return res;
484        if (additionalPresetsValueData.contains(key))
485            return Collections.emptySet();
486        // null means key is not known
487        return null;
488    }
489
490    /**
491     * Determines if the given key is in internal presets.
492     * @param key key
493     * @return {@code true} if the given key is in internal presets
494     * @since 9023
495     */
496    public static boolean isKeyInPresets(String key) {
497        return TaggingPresets.getPresetValues(key) != null;
498    }
499
500    /**
501     * Determines if the given tag is in internal presets.
502     * @param key key
503     * @param value value
504     * @return {@code true} if the given tag is in internal presets
505     * @since 9023
506     */
507    public static boolean isTagInPresets(String key, String value) {
508        final Set<String> values = getPresetValues(key);
509        return values != null && values.contains(value);
510    }
511
512    /**
513     * Returns the list of ignored tags.
514     * @return the list of ignored tags
515     * @since 9023
516     */
517    public static List<Tag> getIgnoredTags() {
518        return new ArrayList<>(ignoreDataTag);
519    }
520
521    /**
522     * Determines if the given tag key is ignored for checks "key/tag not in presets".
523     * @param key key
524     * @return true if the given key is ignored
525     */
526    private static boolean isKeyIgnored(String key) {
527        if (ignoreDataEquals.contains(key)) {
528            return true;
529        }
530        for (String a : ignoreDataStartsWith) {
531            if (key.startsWith(a)) {
532                return true;
533            }
534        }
535        for (String a : ignoreDataEndsWith) {
536            if (key.endsWith(a)) {
537                return true;
538            }
539        }
540        return false;
541    }
542
543    /**
544     * Determines if the given tag is ignored for checks "key/tag not in presets".
545     * @param key key
546     * @param value value
547     * @return {@code true} if the given tag is ignored
548     * @since 9023
549     */
550    public static boolean isTagIgnored(String key, String value) {
551        if (isKeyIgnored(key))
552            return true;
553        final Set<String> values = getPresetValues(key);
554        if (values != null && values.isEmpty())
555            return true;
556        if (!isTagInPresets(key, value)) {
557            for (Tag a : ignoreDataTag) {
558                if (key.equals(a.getKey()) && value.equals(a.getValue())) {
559                    return true;
560                }
561            }
562        }
563        return false;
564    }
565
566    /**
567     * Checks the primitive tags
568     * @param p The primitive to check
569     */
570    @Override
571    public void check(OsmPrimitive p) {
572        if (!p.isTagged())
573            return;
574
575        // Just a collection to know if a primitive has been already marked with error
576        MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
577
578        for (Entry<String, String> prop : p.getKeys().entrySet()) {
579            String s = marktr("Tag ''{0}'' invalid.");
580            String key = prop.getKey();
581            String value = prop.getValue();
582
583            if (checkKeys) {
584                checkSingleTagKeySimple(withErrors, p, s, key);
585            }
586            if (checkValues) {
587                checkSingleTagValueSimple(withErrors, p, s, key, value);
588                checkSingleTagComplex(withErrors, p, key, value);
589            }
590            if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
591                errors.add(TestError.builder(this, Severity.OTHER, FIXME)
592                        .message(tr("FIXMES"))
593                        .primitives(p)
594                        .build());
595                withErrors.put(p, "FIXME");
596            }
597        }
598    }
599
600    private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
601        if (!checkValues || value == null)
602            return;
603        if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
604            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
605                    .message(tr("Tag value contains non-printing character"), s, key)
606                    .primitives(p)
607                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
608                    .build());
609            withErrors.put(p, "ICV");
610        }
611        if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) {
612            errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
613                    .message(tr("Tag value contains unusual Unicode character"), s, key)
614                    .primitives(p)
615                    .build());
616            withErrors.put(p, "UUCV");
617        }
618        if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
619            errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
620                    .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
621                    .primitives(p)
622                    .build());
623            withErrors.put(p, "LV");
624        }
625        if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) {
626            errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
627                    .message(tr("Tags with empty values"), s, key)
628                    .primitives(p)
629                    .build());
630            withErrors.put(p, "EV");
631        }
632        final String errTypeSpace = "SPACE";
633        if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
634            errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
635                    .message(tr("Property values start or end with white space"), s, key)
636                    .primitives(p)
637                    .build());
638            withErrors.put(p, errTypeSpace);
639        }
640        if (value.contains("  ") && !withErrors.contains(p, errTypeSpace)) {
641            errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
642                    .message(tr("Property values contain multiple white spaces"), s, key)
643                    .primitives(p)
644                    .build());
645            withErrors.put(p, errTypeSpace);
646        }
647        if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
648            errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
649                    .message(tr("Property values contain HTML entity"), s, key)
650                    .primitives(p)
651                    .build());
652            withErrors.put(p, "HTML");
653        }
654    }
655
656    private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
657        if (!checkKeys || key == null)
658            return;
659        if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
660            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
661                    .message(tr("Tag key contains non-printing character"), s, key)
662                    .primitives(p)
663                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
664                    .build());
665            withErrors.put(p, "ICK");
666        }
667        if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
668            errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
669                    .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
670                    .primitives(p)
671                    .build());
672            withErrors.put(p, "LK");
673        }
674        if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
675            errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
676                    .message(tr("Invalid white space in property key"), s, key)
677                    .primitives(p)
678                    .build());
679            withErrors.put(p, "IPK");
680        }
681    }
682
683    private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
684        if (!checkValues || key == null || value == null || value.isEmpty())
685            return;
686        if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
687            if (!isKeyInPresets(key)) {
688                spellCheckKey(withErrors, p, key);
689            } else if (!isTagInPresets(key, value)) {
690                if (oftenUsedTags.contains(key, value)) {
691                    // tag is quite often used but not in presets
692                    errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
693                            .message(tr("Presets do not contain property value"),
694                                    marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
695                            .primitives(p)
696                            .build());
697                    withErrors.put(p, "UPV");
698                } else {
699                    tryGuess(p, key, value, withErrors);
700                }
701            }
702        }
703    }
704
705    private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
706        String prettifiedKey = harmonizeKey(key);
707        String fixedKey;
708        if (ignoreDataEquals.contains(prettifiedKey)) {
709            fixedKey = prettifiedKey;
710        } else {
711            fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
712        }
713        if (fixedKey == null) {
714            for (Tag a : ignoreDataTag) {
715                if (a.getKey().equals(prettifiedKey)) {
716                    fixedKey = prettifiedKey;
717                    break;
718                }
719            }
720        }
721
722        if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) {
723            final String proposedKey = fixedKey;
724            // misspelled preset key
725            final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
726                    .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
727                    .primitives(p);
728            if (p.hasKey(fixedKey)) {
729                errors.add(error.build());
730            } else {
731                errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
732            }
733            withErrors.put(p, "WPK");
734        } else {
735            errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
736                    .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
737                    .primitives(p)
738                    .build());
739            withErrors.put(p, "UPK");
740        }
741    }
742
743    private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
744        // try to fix common typos and check again if value is still unknown
745        final String harmonizedValue = harmonizeValue(value);
746        if (harmonizedValue == null || harmonizedValue.isEmpty())
747            return;
748        String fixedValue = null;
749        List<Set<String>> sets = new ArrayList<>();
750        Set<String> presetValues = getPresetValues(key);
751        if (presetValues != null)
752            sets.add(presetValues);
753        Set<String> usedValues = oftenUsedTags.get(key);
754        if (usedValues != null)
755            sets.add(usedValues);
756        for (Set<String> possibleValues: sets) {
757            if (possibleValues.contains(harmonizedValue)) {
758                fixedValue = harmonizedValue;
759                break;
760            }
761        }
762        if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
763            int maxPresetValueLen = 0;
764            List<String> fixVals = new ArrayList<>();
765            // use Levenshtein distance to find typical typos
766            int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
767            String closest = null;
768            for (Set<String> possibleValues: sets) {
769                for (String possibleVal : possibleValues) {
770                    if (possibleVal.isEmpty())
771                        continue;
772                    maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
773                    if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
774                        // don't suggest fix value when given value is short and lengths are too different
775                        // for example surface=u would result in surface=mud
776                        continue;
777                    }
778                    int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
779                    if (dist >= harmonizedValue.length()) {
780                        // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
781                        continue;
782                    }
783                    if (dist < minDist) {
784                        closest = possibleVal;
785                        minDist = dist;
786                        fixVals.clear();
787                        fixVals.add(possibleVal);
788                    } else if (dist == minDist) {
789                        fixVals.add(possibleVal);
790                    }
791                }
792            }
793
794            if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
795                    && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
796                if (fixVals.size() < 2) {
797                    fixedValue = closest;
798                } else {
799                    Collections.sort(fixVals);
800                    // misspelled preset value with multiple good alternatives
801                    errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
802                            .message(tr("Unknown property value"),
803                                    marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
804                                    value, key, fixVals)
805                            .primitives(p).build());
806                    withErrors.put(p, "WPV");
807                    return;
808                }
809            }
810        }
811        if (fixedValue != null && !fixedValue.equals(value)) {
812            final String newValue = fixedValue;
813            // misspelled preset value
814            errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
815                    .message(tr("Unknown property value"),
816                            marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
817                    .primitives(p)
818                    .build());
819            withErrors.put(p, "WPV");
820        } else {
821            // unknown preset value
822            errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
823                    .message(tr("Presets do not contain property value"),
824                            marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
825                    .primitives(p)
826                    .build());
827            withErrors.put(p, "UPV");
828        }
829    }
830
831    private static boolean isNum(String harmonizedValue) {
832        try {
833            Double.parseDouble(harmonizedValue);
834            return true;
835        } catch (NumberFormatException e) {
836            return false;
837        }
838    }
839
840    private static boolean isFixme(String key, String value) {
841        return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
842          || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
843    }
844
845    private static String harmonizeKey(String key) {
846        return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
847    }
848
849    private static String harmonizeValue(String value) {
850        return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
851    }
852
853    @Override
854    public void startTest(ProgressMonitor monitor) {
855        super.startTest(monitor);
856        checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
857        if (isBeforeUpload) {
858            checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
859        }
860
861        checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
862        if (isBeforeUpload) {
863            checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
864        }
865
866        checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
867        if (isBeforeUpload) {
868            checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
869        }
870
871        checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
872        if (isBeforeUpload) {
873            checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
874        }
875    }
876
877    @Override
878    public void visit(Collection<OsmPrimitive> selection) {
879        if (checkKeys || checkValues || checkComplex || checkFixmes) {
880            super.visit(selection);
881        }
882    }
883
884    @Override
885    public void addGui(JPanel testPanel) {
886        GBC a = GBC.eol();
887        a.anchor = GridBagConstraints.EAST;
888
889        testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
890
891        prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
892        prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
893        testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
894
895        prefCheckKeysBeforeUpload = new JCheckBox();
896        prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
897        testPanel.add(prefCheckKeysBeforeUpload, a);
898
899        prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
900        prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
901        testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
902
903        prefCheckComplexBeforeUpload = new JCheckBox();
904        prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
905        testPanel.add(prefCheckComplexBeforeUpload, a);
906
907        final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
908        sourcesList = new EditableList(tr("TagChecker source"));
909        sourcesList.setItems(sources);
910        testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
911        testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
912
913        ActionListener disableCheckActionListener = e -> handlePrefEnable();
914        prefCheckKeys.addActionListener(disableCheckActionListener);
915        prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
916        prefCheckComplex.addActionListener(disableCheckActionListener);
917        prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
918
919        handlePrefEnable();
920
921        prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
922        prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
923        testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
924
925        prefCheckValuesBeforeUpload = new JCheckBox();
926        prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
927        testPanel.add(prefCheckValuesBeforeUpload, a);
928
929        prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
930        prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value."));
931        testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
932
933        prefCheckFixmesBeforeUpload = new JCheckBox();
934        prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
935        testPanel.add(prefCheckFixmesBeforeUpload, a);
936    }
937
938    /**
939     * Enables/disables the source list field
940     */
941    public void handlePrefEnable() {
942        boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
943                || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
944        sourcesList.setEnabled(selected);
945    }
946
947    @Override
948    public boolean ok() {
949        enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
950        testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
951                || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
952
953        Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
954        Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
955        Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
956        Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
957        Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
958        Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
959        Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
960        Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
961        return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
962    }
963
964    @Override
965    public Command fixError(TestError testError) {
966        List<Command> commands = new ArrayList<>(50);
967
968        Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
969        for (OsmPrimitive p : primitives) {
970            Map<String, String> tags = p.getKeys();
971            if (tags.isEmpty()) {
972                continue;
973            }
974
975            for (Entry<String, String> prop: tags.entrySet()) {
976                String key = prop.getKey();
977                String value = prop.getValue();
978                if (value == null || value.trim().isEmpty()) {
979                    commands.add(new ChangePropertyCommand(p, key, null));
980                } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains("  ")) {
981                    commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
982                } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains("  ")) {
983                    commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
984                } else {
985                    String evalue = Entities.unescape(value);
986                    if (!evalue.equals(value)) {
987                        commands.add(new ChangePropertyCommand(p, key, evalue));
988                    }
989                }
990            }
991        }
992
993        if (commands.isEmpty())
994            return null;
995        if (commands.size() == 1)
996            return commands.get(0);
997
998        return new SequenceCommand(tr("Fix tags"), commands);
999    }
1000
1001    @Override
1002    public boolean isFixable(TestError testError) {
1003        if (testError.getTester() instanceof TagChecker) {
1004            int code = testError.getCode();
1005            return code == EMPTY_VALUES || code == INVALID_SPACE ||
1006                   code == INVALID_KEY_SPACE || code == INVALID_HTML ||
1007                   code == MULTIPLE_SPACES;
1008        }
1009
1010        return false;
1011    }
1012}