001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.openstreetmap.josm.data.validation.routines;
018
019import java.util.Arrays;
020
021/**
022 * <p><b>Domain name</b> validation routines.</p>
023 *
024 * <p>
025 * This validator provides methods for validating Internet domain names
026 * and top-level domains.
027 * </p>
028 *
029 * <p>Domain names are evaluated according
030 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
031 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
032 * section 2.1. No accomodation is provided for the specialized needs of
033 * other applications; if the domain name has been URL-encoded, for example,
034 * validation will fail even though the equivalent plaintext version of the
035 * same name would have passed.
036 * </p>
037 *
038 * <p>
039 * Validation is also provided for top-level domains (TLDs) as defined and
040 * maintained by the Internet Assigned Numbers Authority (IANA):
041 * </p>
042 *
043 *   <ul>
044 *     <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
045 *         (<code>.arpa</code>, etc.)</li>
046 *     <li>{@link #isValidGenericTld} - validates generic TLDs
047 *         (<code>.com, .org</code>, etc.)</li>
048 *     <li>{@link #isValidIdnTld} - validates IDN TLDs
049 *         (<code>.xn--*</code>, etc.)</li>
050 *     <li>{@link #isValidCountryCodeTld} - validates country code TLDs
051 *         (<code>.us, .uk, .cn</code>, etc.)</li>
052 *   </ul>
053 *
054 * <p>
055 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
056 * methods to ensure that a given domain name matches a specific IP; see
057 * {@link java.net.InetAddress} for that functionality.)
058 * </p>
059 *
060 * @version $Revision: 1640271 $ $Date: 2014-11-18 02:32:15 2014 UTC (Tue, 18 Nov 2014) $
061 * @since Validator 1.4
062 */
063public class DomainValidator extends AbstractValidator {
064
065    // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
066    private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
067    private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
068    // JOSM PATCH BEGIN
069    // See #10862 - IDN TLDs in ASCII form
070    private static final String TOP_LABEL_IDN_REGEX = "(?:xn|XN)--\\p{Alnum}{2,}(?:-\\p{Alpha}{2,})?";
071    private static final String DOMAIN_NAME_REGEX =
072            "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + "|" + TOP_LABEL_IDN_REGEX + ")$";
073    // JOSM PATCH END
074
075    private final boolean allowLocal;
076
077    /**
078     * Singleton instance of this validator, which
079     *  doesn't consider local addresses as valid.
080     */
081    private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
082
083    /**
084     * Singleton instance of this validator, which does
085     *  consider local addresses valid.
086     */
087    private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
088
089    /**
090     * RegexValidator for matching domains.
091     */
092    private final RegexValidator domainRegex =
093            new RegexValidator(DOMAIN_NAME_REGEX);
094    /**
095     * RegexValidator for matching the a local hostname
096     */
097    private final RegexValidator hostnameRegex =
098            new RegexValidator(DOMAIN_LABEL_REGEX);
099
100    /**
101     * Returns the singleton instance of this validator. It
102     *  will not consider local addresses as valid.
103     * @return the singleton instance of this validator
104     */
105    public static DomainValidator getInstance() {
106        return DOMAIN_VALIDATOR;
107    }
108
109    /**
110     * Returns the singleton instance of this validator,
111     *  with local validation as required.
112     * @param allowLocal Should local addresses be considered valid?
113     * @return the singleton instance of this validator
114     */
115    public static DomainValidator getInstance(boolean allowLocal) {
116       if(allowLocal) {
117          return DOMAIN_VALIDATOR_WITH_LOCAL;
118       }
119       return DOMAIN_VALIDATOR;
120    }
121
122    /** Private constructor. */
123    private DomainValidator(boolean allowLocal) {
124       this.allowLocal = allowLocal;
125    }
126
127    /**
128     * Returns true if the specified <code>String</code> parses
129     * as a valid domain name with a recognized top-level domain.
130     * The parsing is case-sensitive.
131     * @param domain the parameter to check for domain name syntax
132     * @return true if the parameter is a valid domain name
133     */
134    @Override
135    public boolean isValid(String domain) {
136        String[] groups = domainRegex.match(domain);
137        if (groups != null && groups.length > 0) {
138            return isValidTld(groups[0]);
139        } else if(allowLocal) {
140            if (hostnameRegex.isValid(domain)) {
141               return true;
142            }
143        }
144        return false;
145    }
146
147    /**
148     * Returns true if the specified <code>String</code> matches any
149     * IANA-defined top-level domain. Leading dots are ignored if present.
150     * The search is case-sensitive.
151     * @param tld the parameter to check for TLD status
152     * @return true if the parameter is a TLD
153     */
154    public boolean isValidTld(String tld) {
155        if(allowLocal && isValidLocalTld(tld)) {
156           return true;
157        }
158        return isValidInfrastructureTld(tld)
159                || isValidGenericTld(tld)
160                || isValidIdnTld(tld)
161                || isValidCountryCodeTld(tld);
162    }
163
164    /**
165     * Returns true if the specified <code>String</code> matches any
166     * IANA-defined infrastructure top-level domain. Leading dots are
167     * ignored if present. The search is case-sensitive.
168     * @param iTld the parameter to check for infrastructure TLD status
169     * @return true if the parameter is an infrastructure TLD
170     */
171    public boolean isValidInfrastructureTld(String iTld) {
172        return Arrays.binarySearch(INFRASTRUCTURE_TLDS, (chompLeadingDot(iTld.toLowerCase()))) >= 0;
173    }
174
175    /**
176     * Returns true if the specified <code>String</code> matches any
177     * IANA-defined generic top-level domain. Leading dots are ignored
178     * if present. The search is case-sensitive.
179     * @param gTld the parameter to check for generic TLD status
180     * @return true if the parameter is a generic TLD
181     */
182    public boolean isValidGenericTld(String gTld) {
183        return Arrays.binarySearch(GENERIC_TLDS, chompLeadingDot(gTld.toLowerCase())) >= 0;
184    }
185
186    /**
187     * Returns true if the specified <code>String</code> matches any
188     * IANA-defined IDN top-level domain. Leading dots are ignored
189     * if present. The search is case-sensitive.
190     * @param iTld the parameter to check for IDN TLD status
191     * @return true if the parameter is an IDN TLD
192     */
193    public boolean isValidIdnTld(String iTld) {
194        return Arrays.binarySearch(IDN_TLDS, chompLeadingDot(iTld.toUpperCase())) >= 0;
195    }
196
197    /**
198     * Returns true if the specified <code>String</code> matches any
199     * IANA-defined country code top-level domain. Leading dots are
200     * ignored if present. The search is case-sensitive.
201     * @param ccTld the parameter to check for country code TLD status
202     * @return true if the parameter is a country code TLD
203     */
204    public boolean isValidCountryCodeTld(String ccTld) {
205        return Arrays.binarySearch(COUNTRY_CODE_TLDS, chompLeadingDot(ccTld.toLowerCase())) >= 0;
206    }
207
208    /**
209     * Returns true if the specified <code>String</code> matches any
210     * widely used "local" domains (localhost or localdomain). Leading dots are
211     *  ignored if present. The search is case-sensitive.
212     * @param iTld the parameter to check for local TLD status
213     * @return true if the parameter is an local TLD
214     */
215    public boolean isValidLocalTld(String iTld) {
216        return Arrays.binarySearch(LOCAL_TLDS, chompLeadingDot(iTld.toLowerCase())) >= 0;
217    }
218
219    private String chompLeadingDot(String str) {
220        if (str.startsWith(".")) {
221            return str.substring(1);
222        } else {
223            return str;
224        }
225    }
226
227    // ---------------------------------------------
228    // ----- TLDs defined by IANA
229    // ----- Authoritative and comprehensive list at:
230    // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
231
232    private static final String[] INFRASTRUCTURE_TLDS = new String[] {
233        "arpa",               // internet infrastructure
234        "root"                // diagnostic marker for non-truncated root zone
235    };
236
237    private static final String[] GENERIC_TLDS = new String[] {
238        "abogado",
239        "academy",
240        "accountants",
241        "active",
242        "actor",
243        "aero",
244        "agency",
245        "airforce",
246        "allfinanz",
247        "alsace",
248        "archi",
249        "army",
250        "arpa",
251        "asia",
252        "associates",
253        "attorney",
254        "auction",
255        "audio",
256        "autos",
257        "axa",
258        "band",
259        "bar",
260        "bargains",
261        "bayern",
262        "beer",
263        "berlin",
264        "best",
265        "bid",
266        "bike",
267        "bio",
268        "biz",
269        "black",
270        "blackfriday",
271        "blue",
272        "bmw",
273        "bnpparibas",
274        "boo",
275        "boutique",
276        "brussels",
277        "budapest",
278        "build",
279        "builders",
280        "business",
281        "buzz",
282        "bzh",
283        "cab",
284        "cal",
285        "camera",
286        "camp",
287        "cancerresearch",
288        "capetown",
289        "capital",
290        "caravan",
291        "cards",
292        "care",
293        "career",
294        "careers",
295        "casa",
296        "cash",
297        "cat",
298        "catering",
299        "center",
300        "ceo",
301        "cern",
302        "channel",
303        "cheap",
304        "christmas",
305        "chrome",
306        "church",
307        "citic",
308        "city",
309        "claims",
310        "cleaning",
311        "click",
312        "clinic",
313        "clothing",
314        "club",
315        "codes",
316        "coffee",
317        "college",
318        "cologne",
319        "com",
320        "community",
321        "company",
322        "computer",
323        "condos",
324        "construction",
325        "consulting",
326        "contractors",
327        "cooking",
328        "cool",
329        "coop",
330        "country",
331        "credit",
332        "creditcard",
333        "crs",
334        "cruises",
335        "cuisinella",
336        "cymru",
337        "dad",
338        "dance",
339        "dating",
340        "day",
341        "deals",
342        "degree",
343        "democrat",
344        "dental",
345        "dentist",
346        "desi",
347        "diamonds",
348        "diet",
349        "digital",
350        "direct",
351        "directory",
352        "discount",
353        "dnp",
354        "domains",
355        "durban",
356        "dvag",
357        "eat",
358        "edu",
359        "education",
360        "email",
361        "engineer",
362        "engineering",
363        "enterprises",
364        "equipment",
365        "esq",
366        "estate",
367        "eus",
368        "events",
369        "exchange",
370        "expert",
371        "exposed",
372        "fail",
373        "farm",
374        "feedback",
375        "finance",
376        "financial",
377        "fish",
378        "fishing",
379        "fitness",
380        "flights",
381        "florist",
382        "flsmidth",
383        "fly",
384        "foo",
385        "forsale",
386        "foundation",
387        "frl",
388        "frogans",
389        "fund",
390        "furniture",
391        "futbol",
392        "gal",
393        "gallery",
394        "gbiz",
395        "gent",
396        "gift",
397        "gifts",
398        "gives",
399        "glass",
400        "gle",
401        "global",
402        "globo",
403        "gmail",
404        "gmo",
405        "gmx",
406        "google",
407        "gop",
408        "gov",
409        "graphics",
410        "gratis",
411        "green",
412        "gripe",
413        "guide",
414        "guitars",
415        "guru",
416        "hamburg",
417        "haus",
418        "healthcare",
419        "help",
420        "here",
421        "hiphop",
422        "hiv",
423        "holdings",
424        "holiday",
425        "homes",
426        "horse",
427        "host",
428        "hosting",
429        "house",
430        "how",
431        "ibm",
432        "immo",
433        "immobilien",
434        "industries",
435        "info",
436        "ing",
437        "ink",
438        "institute",
439        "insure",
440        "int",
441        "international",
442        "investments",
443        "jetzt",
444        "jobs",
445        "joburg",
446        "juegos",
447        "kaufen",
448        "kim",
449        "kitchen",
450        "kiwi",
451        "koeln",
452        "krd",
453        "kred",
454        "lacaixa",
455        "land",
456        "lawyer",
457        "lease",
458        "lgbt",
459        "life",
460        "lighting",
461        "limited",
462        "limo",
463        "link",
464        "loans",
465        "london",
466        "lotto",
467        "ltda",
468        "luxe",
469        "luxury",
470        "maison",
471        "management",
472        "mango",
473        "market",
474        "marketing",
475        "media",
476        "meet",
477        "melbourne",
478        "meme",
479        "menu",
480        "miami",
481        "mil",
482        "mini",
483        "mobi",
484        "moda",
485        "moe",
486        "monash",
487        "mortgage",
488        "moscow",
489        "motorcycles",
490        "mov",
491        "museum",
492        "nagoya",
493        "name",
494        "navy",
495        "net",
496        "network",
497        "neustar",
498        "new",
499        "nexus",
500        "ngo",
501        "nhk",
502        "ninja",
503        "nra",
504        "nrw",
505        "nyc",
506        "okinawa",
507        "ong",
508        "onl",
509        "ooo",
510        "org",
511        "organic",
512        "otsuka",
513        "ovh",
514        "paris",
515        "partners",
516        "parts",
517        "pharmacy",
518        "photo",
519        "photography",
520        "photos",
521        "physio",
522        "pics",
523        "pictures",
524        "pink",
525        "pizza",
526        "place",
527        "plumbing",
528        "pohl",
529        "poker",
530        "post",
531        "praxi",
532        "press",
533        "pro",
534        "prod",
535        "productions",
536        "prof",
537        "properties",
538        "property",
539        "pub",
540        "qpon",
541        "quebec",
542        "realtor",
543        "recipes",
544        "red",
545        "rehab",
546        "reise",
547        "reisen",
548        "ren",
549        "rentals",
550        "repair",
551        "report",
552        "republican",
553        "rest",
554        "restaurant",
555        "reviews",
556        "rich",
557        "rio",
558        "rip",
559        "rocks",
560        "rodeo",
561        "rsvp",
562        "ruhr",
563        "ryukyu",
564        "saarland",
565        "sarl",
566        "sca",
567        "scb",
568        "schmidt",
569        "schule",
570        "scot",
571        "services",
572        "sexy",
573        "shiksha",
574        "shoes",
575        "singles",
576        "social",
577        "software",
578        "sohu",
579        "solar",
580        "solutions",
581        "soy",
582        "space",
583        "spiegel",
584        "supplies",
585        "supply",
586        "support",
587        "surf",
588        "surgery",
589        "suzuki",
590        "systems",
591        "tatar",
592        "tattoo",
593        "tax",
594        "technology",
595        "tel",
596        "tienda",
597        "tips",
598        "tirol",
599        "today",
600        "tokyo",
601        "tools",
602        "top",
603        "town",
604        "toys",
605        "trade",
606        "training",
607        "travel",
608        "tui",
609        "university",
610        "uno",
611        "uol",
612        "vacations",
613        "vegas",
614        "ventures",
615        "versicherung",
616        "vet",
617        "viajes",
618        "villas",
619        "vision",
620        "vlaanderen",
621        "vodka",
622        "vote",
623        "voting",
624        "voto",
625        "voyage",
626        "wales",
627        "wang",
628        "watch",
629        "webcam",
630        "website",
631        "wed",
632        "wedding",
633        "whoswho",
634        "wien",
635        "wiki",
636        "williamhill",
637        "wme",
638        "work",
639        "works",
640        "world",
641        "wtc",
642        "wtf",
643        "xxx",
644        "xyz",
645        "yachts",
646        "yandex",
647        "yoga",
648        "yokohama",
649        "youtube",
650        "zip",
651        "zone",
652    };
653
654    // JOSM PATCH BEGIN
655    // see #10862 - list of IDN TLDs taken from IANA on 2014-12-18
656    private static final String[] IDN_TLDS = new String[] {
657        "XN--1QQW23A",
658        "XN--3BST00M",
659        "XN--3DS443G",
660        "XN--3E0B707E",
661        "XN--45BRJ9C",
662        "XN--45Q11C",
663        "XN--4GBRIM",
664        "XN--55QW42G",
665        "XN--55QX5D",
666        "XN--6FRZ82G",
667        "XN--6QQ986B3XL",
668        "XN--80ADXHKS",
669        "XN--80AO21A",
670        "XN--80ASEHDB",
671        "XN--80ASWG",
672        "XN--90A3AC",
673        "XN--C1AVG",
674        "XN--CG4BKI",
675        "XN--CLCHC0EA0B2G2A9GCD",
676        "XN--CZR694B",
677        "XN--CZRS0T",
678        "XN--CZRU2D",
679        "XN--D1ACJ3B",
680        "XN--D1ALF",
681        "XN--FIQ228C5HS",
682        "XN--FIQ64B",
683        "XN--FIQS8S",
684        "XN--FIQZ9S",
685        "XN--FLW351E",
686        "XN--FPCRJ9C3D",
687        "XN--FZC2C9E2C",
688        "XN--GECRJ9C",
689        "XN--H2BRJ9C",
690        "XN--HXT814E",
691        "XN--I1B6B1A6A2E",
692        "XN--IO0A7I",
693        "XN--J1AMH",
694        "XN--J6W193G",
695        "XN--KPRW13D",
696        "XN--KPRY57D",
697        "XN--KPUT3I",
698        "XN--L1ACC",
699        "XN--LGBBAT1AD8J",
700        "XN--MGB9AWBF",
701        "XN--MGBA3A4F16A",
702        "XN--MGBAAM7A8H",
703        "XN--MGBAB2BD",
704        "XN--MGBAYH7GPA",
705        "XN--MGBBH1A71E",
706        "XN--MGBC0A9AZCG",
707        "XN--MGBERP4A5D4AR",
708        "XN--MGBX4CD0AB",
709        "XN--NGBC5AZD",
710        "XN--NODE",
711        "XN--NQV7F",
712        "XN--NQV7FS00EMA",
713        "XN--O3CW4H",
714        "XN--OGBPF8FL",
715        "XN--P1ACF",
716        "XN--P1AI",
717        "XN--PGBS0DH",
718        "XN--Q9JYB4C",
719        "XN--QCKA1PMC",
720        "XN--RHQV96G",
721        "XN--S9BRJ9C",
722        "XN--SES554G",
723        "XN--UNUP4Y",
724        "XN--VERMGENSBERATER-CTB",
725        "XN--VERMGENSBERATUNG-PWB",
726        "XN--VHQUV",
727        "XN--WGBH1C",
728        "XN--WGBL6A",
729        "XN--XHQ521B",
730        "XN--XKC2AL3HYE2A",
731        "XN--XKC2DL3A5EE0H",
732        "XN--YFRO4I67O",
733        "XN--YGBI2AMMX",
734        "XN--ZFR164B",
735    };
736    // END JOSM PATCH
737
738    private static final String[] COUNTRY_CODE_TLDS = new String[] {
739        "ac",                 // Ascension Island
740        "ad",                 // Andorra
741        "ae",                 // United Arab Emirates
742        "af",                 // Afghanistan
743        "ag",                 // Antigua and Barbuda
744        "ai",                 // Anguilla
745        "al",                 // Albania
746        "am",                 // Armenia
747        "an",                 // Netherlands Antilles
748        "ao",                 // Angola
749        "aq",                 // Antarctica
750        "ar",                 // Argentina
751        "as",                 // American Samoa
752        "at",                 // Austria
753        "au",                 // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
754        "aw",                 // Aruba
755        "ax",                 // Åland
756        "az",                 // Azerbaijan
757        "ba",                 // Bosnia and Herzegovina
758        "bb",                 // Barbados
759        "bd",                 // Bangladesh
760        "be",                 // Belgium
761        "bf",                 // Burkina Faso
762        "bg",                 // Bulgaria
763        "bh",                 // Bahrain
764        "bi",                 // Burundi
765        "bj",                 // Benin
766        "bm",                 // Bermuda
767        "bn",                 // Brunei Darussalam
768        "bo",                 // Bolivia
769        "br",                 // Brazil
770        "bs",                 // Bahamas
771        "bt",                 // Bhutan
772        "bv",                 // Bouvet Island
773        "bw",                 // Botswana
774        "by",                 // Belarus
775        "bz",                 // Belize
776        "ca",                 // Canada
777        "cc",                 // Cocos (Keeling) Islands
778        "cd",                 // Democratic Republic of the Congo (formerly Zaire)
779        "cf",                 // Central African Republic
780        "cg",                 // Republic of the Congo
781        "ch",                 // Switzerland
782        "ci",                 // Côte d'Ivoire
783        "ck",                 // Cook Islands
784        "cl",                 // Chile
785        "cm",                 // Cameroon
786        "cn",                 // China, mainland
787        "co",                 // Colombia
788        "cr",                 // Costa Rica
789        "cu",                 // Cuba
790        "cv",                 // Cape Verde
791        "cw",                 // Curaçao
792        "cx",                 // Christmas Island
793        "cy",                 // Cyprus
794        "cz",                 // Czech Republic
795        "de",                 // Germany
796        "dj",                 // Djibouti
797        "dk",                 // Denmark
798        "dm",                 // Dominica
799        "do",                 // Dominican Republic
800        "dz",                 // Algeria
801        "ec",                 // Ecuador
802        "ee",                 // Estonia
803        "eg",                 // Egypt
804        "er",                 // Eritrea
805        "es",                 // Spain
806        "et",                 // Ethiopia
807        "eu",                 // European Union
808        "fi",                 // Finland
809        "fj",                 // Fiji
810        "fk",                 // Falkland Islands
811        "fm",                 // Federated States of Micronesia
812        "fo",                 // Faroe Islands
813        "fr",                 // France
814        "ga",                 // Gabon
815        "gb",                 // Great Britain (United Kingdom)
816        "gd",                 // Grenada
817        "ge",                 // Georgia
818        "gf",                 // French Guiana
819        "gg",                 // Guernsey
820        "gh",                 // Ghana
821        "gi",                 // Gibraltar
822        "gl",                 // Greenland
823        "gm",                 // The Gambia
824        "gn",                 // Guinea
825        "gp",                 // Guadeloupe
826        "gq",                 // Equatorial Guinea
827        "gr",                 // Greece
828        "gs",                 // South Georgia and the South Sandwich Islands
829        "gt",                 // Guatemala
830        "gu",                 // Guam
831        "gw",                 // Guinea-Bissau
832        "gy",                 // Guyana
833        "hk",                 // Hong Kong
834        "hm",                 // Heard Island and McDonald Islands
835        "hn",                 // Honduras
836        "hr",                 // Croatia (Hrvatska)
837        "ht",                 // Haiti
838        "hu",                 // Hungary
839        "id",                 // Indonesia
840        "ie",                 // Ireland (Éire)
841        "il",                 // Israel
842        "im",                 // Isle of Man
843        "in",                 // India
844        "io",                 // British Indian Ocean Territory
845        "iq",                 // Iraq
846        "ir",                 // Iran
847        "is",                 // Iceland
848        "it",                 // Italy
849        "je",                 // Jersey
850        "jm",                 // Jamaica
851        "jo",                 // Jordan
852        "jp",                 // Japan
853        "ke",                 // Kenya
854        "kg",                 // Kyrgyzstan
855        "kh",                 // Cambodia (Khmer)
856        "ki",                 // Kiribati
857        "km",                 // Comoros
858        "kn",                 // Saint Kitts and Nevis
859        "kp",                 // North Korea
860        "kr",                 // South Korea
861        "kw",                 // Kuwait
862        "ky",                 // Cayman Islands
863        "kz",                 // Kazakhstan
864        "la",                 // Laos (currently being marketed as the official domain for Los Angeles)
865        "lb",                 // Lebanon
866        "lc",                 // Saint Lucia
867        "li",                 // Liechtenstein
868        "lk",                 // Sri Lanka
869        "lr",                 // Liberia
870        "ls",                 // Lesotho
871        "lt",                 // Lithuania
872        "lu",                 // Luxembourg
873        "lv",                 // Latvia
874        "ly",                 // Libya
875        "ma",                 // Morocco
876        "mc",                 // Monaco
877        "md",                 // Moldova
878        "me",                 // Montenegro
879        "mg",                 // Madagascar
880        "mh",                 // Marshall Islands
881        "mk",                 // Republic of Macedonia
882        "ml",                 // Mali
883        "mm",                 // Myanmar
884        "mn",                 // Mongolia
885        "mo",                 // Macau
886        "mp",                 // Northern Mariana Islands
887        "mq",                 // Martinique
888        "mr",                 // Mauritania
889        "ms",                 // Montserrat
890        "mt",                 // Malta
891        "mu",                 // Mauritius
892        "mv",                 // Maldives
893        "mw",                 // Malawi
894        "mx",                 // Mexico
895        "my",                 // Malaysia
896        "mz",                 // Mozambique
897        "na",                 // Namibia
898        "nc",                 // New Caledonia
899        "ne",                 // Niger
900        "nf",                 // Norfolk Island
901        "ng",                 // Nigeria
902        "ni",                 // Nicaragua
903        "nl",                 // Netherlands
904        "no",                 // Norway
905        "np",                 // Nepal
906        "nr",                 // Nauru
907        "nu",                 // Niue
908        "nz",                 // New Zealand
909        "om",                 // Oman
910        "pa",                 // Panama
911        "pe",                 // Peru
912        "pf",                 // French Polynesia With Clipperton Island
913        "pg",                 // Papua New Guinea
914        "ph",                 // Philippines
915        "pk",                 // Pakistan
916        "pl",                 // Poland
917        "pm",                 // Saint-Pierre and Miquelon
918        "pn",                 // Pitcairn Islands
919        "pr",                 // Puerto Rico
920        "ps",                 // Palestinian territories (PA-controlled West Bank and Gaza Strip)
921        "pt",                 // Portugal
922        "pw",                 // Palau
923        "py",                 // Paraguay
924        "qa",                 // Qatar
925        "re",                 // Réunion
926        "ro",                 // Romania
927        "rs",                 // Serbia
928        "ru",                 // Russia
929        "rw",                 // Rwanda
930        "sa",                 // Saudi Arabia
931        "sb",                 // Solomon Islands
932        "sc",                 // Seychelles
933        "sd",                 // Sudan
934        "se",                 // Sweden
935        "sg",                 // Singapore
936        "sh",                 // Saint Helena
937        "si",                 // Slovenia
938        "sj",                 // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
939        "sk",                 // Slovakia
940        "sl",                 // Sierra Leone
941        "sm",                 // San Marino
942        "sn",                 // Senegal
943        "so",                 // Somalia
944        "sr",                 // Suriname
945        "st",                 // São Tomé and Príncipe
946        "su",                 // Soviet Union (deprecated)
947        "sv",                 // El Salvador
948        "sx",                 // Sint Maarten
949        "sy",                 // Syria
950        "sz",                 // Swaziland
951        "tc",                 // Turks and Caicos Islands
952        "td",                 // Chad
953        "tf",                 // French Southern and Antarctic Lands
954        "tg",                 // Togo
955        "th",                 // Thailand
956        "tj",                 // Tajikistan
957        "tk",                 // Tokelau
958        "tl",                 // East Timor (deprecated old code)
959        "tm",                 // Turkmenistan
960        "tn",                 // Tunisia
961        "to",                 // Tonga
962        "tp",                 // East Timor
963        "tr",                 // Turkey
964        "tt",                 // Trinidad and Tobago
965        "tv",                 // Tuvalu
966        "tw",                 // Taiwan, Republic of China
967        "tz",                 // Tanzania
968        "ua",                 // Ukraine
969        "ug",                 // Uganda
970        "uk",                 // United Kingdom
971        "um",                 // United States Minor Outlying Islands
972        "us",                 // United States of America
973        "uy",                 // Uruguay
974        "uz",                 // Uzbekistan
975        "va",                 // Vatican City State
976        "vc",                 // Saint Vincent and the Grenadines
977        "ve",                 // Venezuela
978        "vg",                 // British Virgin Islands
979        "vi",                 // U.S. Virgin Islands
980        "vn",                 // Vietnam
981        "vu",                 // Vanuatu
982        "wf",                 // Wallis and Futuna
983        "ws",                 // Samoa (formerly Western Samoa)
984        "ye",                 // Yemen
985        "yt",                 // Mayotte
986        "yu",                 // Serbia and Montenegro (originally Yugoslavia)
987        "za",                 // South Africa
988        "zm",                 // Zambia
989        "zw",                 // Zimbabwe
990    };
991
992    private static final String[] LOCAL_TLDS = new String[] {
993       "localhost",           // RFC2606 defined
994       "localdomain"          // Also widely used as localhost.localdomain
995   };
996
997    static {
998        Arrays.sort(INFRASTRUCTURE_TLDS);
999        Arrays.sort(COUNTRY_CODE_TLDS);
1000        Arrays.sort(GENERIC_TLDS);
1001        Arrays.sort(IDN_TLDS);
1002        Arrays.sort(LOCAL_TLDS);
1003    }
1004}