// Block mixed numeric lookalike + numeric (੨0.com, uses U+0A68).
["xn--0-6ee.com", "\u0a680.com", kUnsafe], // Block fully numeric lookalikes (৪੨.com using U+09EA and U+0A68).
["xn--47b6w.com", "\u09ea\u0a68.com", kUnsafe], // Block single script digit lookalikes (using three U+0A68 characters).
["xn--qccaa.com", "\u0a68\u0a68\u0a68.com", kUnsafe],
// URL test with mostly numbers and one confusable character // Georgian 'd' 4000.com
["xn--4000-pfr.com", "\u10eb4000.com", kUnsafe, "DISABLED"],
// What used to be 5 Aspirational scripts in the earlier versions of UAX 31. // UAX 31 does not define aspirational scripts any more. // See http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts . // Unified Canadian Syllabary
["xn--dfe0tte.ca", "\u1456\u14c2\u14ef.ca", kUnsafe], // Tifinagh
["xn--4ljxa2bb4a6bxb.ma", "\u2d5c\u2d49\u2d3c\u2d49\u2d4f\u2d30\u2d56.ma", kUnsafe], // Tifinagh with a disallowed character(U+2D6F)
["xn--hmjzaby5d5f.ma", "\u2d5c\u2d49\u2d3c\u2d6f\u2d49\u2d4f.ma", kInvalid],
// Yi
["xn--4o7a6e1x64c.cn", "\ua188\ua320\ua071\ua0b7.cn", kUnsafe], // Mongolian - 'ordu' (place, camp)
["xn--56ec8bp.cn", "\u1823\u1837\u1833\u1824.cn", kUnsafe], // Mongolian with a disallowed character
["xn--95e5de3ds.cn", "\u1823\u1837\u1804\u1833\u1824.cn", kUnsafe], // Miao/Pollad
["xn--2u0fpf0a.cn", "\U00016f04\U00016f62\U00016f59.cn", kUnsafe],
// Script mixing tests // The following script combinations are allowed. // HIGHLY_RESTRICTIVE with Latin limited to ASCII-Latin. // ASCII-Latin + Japn (Kana + Han) // ASCII-Latin + Kore (Hangul + Han) // ASCII-Latin + Han + Bopomofo // "payp<alpha>l.com"
["xn--paypl-g9d.com", "payp\u03b1l.com", kUnsafe], // google.gr with Greek omicron and epsilon
["xn--ggl-6xc1ca.gr", "g\u03bf\u03bfgl\u03b5.gr", kUnsafe], // google.ru with Cyrillic o
["xn--ggl-tdd6ba.ru", "g\u043e\u043egl\u0435.ru", kUnsafe], // h<e with acute>llo<China in Han>.cn
["xn--hllo-bpa7979ih5m.cn", "h\u00e9llo\u4e2d\u56fd.cn", kUnsafe, "DISABLED"], // <Greek rho><Cyrillic a><Cyrillic u>.ru
["xn--2xa6t2b.ru", "\u03c1\u0430\u0443.ru", kUnsafe], // Georgian + Latin
["xn--abcef-vuu.test", "abc\u10ebef.test", kUnsafe], // Hangul + Latin
["xn--han-eb9ll88m.kr", "\ud55c\uae00han.kr", kSafe], // Hangul + Latin + Han with IDN ccTLD
["xn--han-or0kq92gkm3c.xn--3e0b707e", "\ud55c\uae00han\u97d3.\ud55c\uad6d", kSafe], // non-ASCII Latin + Hangul
["xn--caf-dma9024xvpg.kr", "caf\u00e9\uce74\ud398.kr", kUnsafe, "DISABLED"], // Hangul + Hiragana
["xn--y9j3b9855e.kr", "\ud55c\u3072\u3089.kr", kUnsafe], // <Hiragana>.<Hangul> is allowed because script mixing check is per label.
["xn--y9j3b.xn--3e0b707e", "\u3072\u3089.\ud55c\uad6d", kSafe], // Traditional Han + Latin
["xn--hanzi-u57ii69i.tw", "\u6f22\u5b57hanzi.tw", kSafe], // Simplified Han + Latin
["xn--hanzi-u57i952h.cn", "\u6c49\u5b57hanzi.cn", kSafe], // Simplified Han + Traditonal Han
["xn--hanzi-if9kt8n.cn", "\u6c49\u6f22hanzi.cn", kSafe], // Han + Hiragana + Katakana + Latin
["xn--kanji-ii4dpizfq59yuykqr4b.jp", "\u632f\u308a\u4eee\u540d\u30ab\u30bfkanji.jp", kSafe], // Han + Bopomofo
["xn--5ekcde0577e87tc.tw", "\u6ce8\u97f3\u3105\u3106\u3107\u3108.tw", kSafe], // Han + Latin + Bopomofo
["xn--bopo-ty4cghi8509kk7xd.tw", "\u6ce8\u97f3bopo\u3105\u3106\u3107\u3108.tw", kSafe], // Latin + Bopomofo
["xn--bopomofo-hj5gkalm.tw", "bopomofo\u3105\u3106\u3107\u3108.tw", kSafe], // Bopomofo + Katakana
["xn--lcka3d1bztghi.tw", "\u3105\u3106\u3107\u3108\u30ab\u30bf\u30ab\u30ca.tw", kUnsafe], // Bopomofo + Hangul
["xn--5ekcde4543qbec.tw", "\u3105\u3106\u3107\u3108\uc8fc\uc74c.tw", kUnsafe], // Devanagari + Latin
["xn--ab-3ofh8fqbj6h.in", "ab\u0939\u093f\u0928\u094d\u0926\u0940.in", kUnsafe], // Thai + Latin
["xn--ab-jsi9al4bxdb6n.th", "ab\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22.th", kUnsafe], // Armenian + Latin
["xn--bs-red.com", "b\u057ds.com", kUnsafe], // Tibetan + Latin
["xn--foo-vkm.com", "foo\u0f37.com", kUnsafe], // Oriya + Latin
["xn--fo-h3g.com", "fo\u0b66.com", kUnsafe], // Gujarati + Latin
["xn--fo-isg.com", "fo\u0ae6.com", kUnsafe], // <vitamin in Katakana>b1.com
["xn--b1-xi4a7cvc9f.com", "\u30d3\u30bf\u30df\u30f3b1.com", kSafe], // Devanagari + Han
["xn--t2bes3ds6749n.com", "\u0930\u094b\u0932\u0947\u76e7\u0938.com", kUnsafe], // Devanagari + Bengali
["xn--11b0x.in", "\u0915\u0995.in", kUnsafe], // Canadian Syllabary + Latin
["xn--ab-lym.com", "ab\u14bf.com", kUnsafe],
["xn--ab1-p6q.com", "ab1\u14bf.com", kUnsafe],
["xn--1ab-m6qd.com", "\u14bf1ab\u14bf.com", kUnsafe],
["xn--ab-jymc.com", "\u14bfab\u14bf.com", kUnsafe], // Tifinagh + Latin
["xn--liy-bq1b.com", "li\u2d4fy.com", kUnsafe],
["xn--rol-cq1b.com", "rol\u2d4f.com", kUnsafe],
["xn--ily-8p1b.com", "\u2d4fily.com", kUnsafe],
["xn--1ly-8p1b.com", "\u2d4f1ly.com", kUnsafe],
// Invisibility check // Thai tone mark malek(U+0E48) repeated
["xn--03c0b3ca.th", "\u0e23\u0e35\u0e48\u0e48.th", kUnsafe], // Accute accent repeated
["xn--a-xbba.com", "a\u0301\u0301.com", kInvalid], // 'a' with acuted accent + another acute accent
["xn--1ca20i.com", "\u00e1\u0301.com", kUnsafe, "DISABLED"], // Combining mark at the beginning
["xn--abc-fdc.jp", "\u0300abc.jp", kInvalid],
// The following three are detected by |dangerous_pattern| regex, but // can be regarded as an extension of blocking repeated diacritic marks. // i followed by U+0307 (combining dot above)
["xn--pixel-8fd.com", "pi\u0307xel.com", kUnsafe], // U+0131 (dotless i) followed by U+0307
["xn--pxel-lza43z.com", "p\u0131\u0307xel.com", kUnsafe], // j followed by U+0307 (combining dot above)
["xn--jack-qwc.com", "j\u0307ack.com", kUnsafe], // l followed by U+0307
["xn--lace-qwc.com", "l\u0307ace.com", kUnsafe],
// Do not allow a combining mark after dotless i/j.
["xn--pxel-lza29y.com", "p\u0131\u0300xel.com", kUnsafe],
["xn--ack-gpb42h.com", "\u0237\u0301ack.com", kUnsafe],
// Mixed script confusable // google with Armenian Small Letter Oh(U+0585)
["xn--gogle-lkg.com", "g\u0585ogle.com", kUnsafe],
["xn--range-kkg.com", "\u0585range.com", kUnsafe],
["xn--cucko-pkg.com", "cucko\u0585.com", kUnsafe], // Latin 'o' in Armenian.
["xn--o-ybcg0cu0cq.com", "o\u0580\u0574\u0578\u0582\u0566\u0568.com", kUnsafe], // Hiragana HE(U+3078) mixed with Katakana
["xn--49jxi3as0d0fpc.com", "\u30e2\u30d2\u30fc\u30c8\u3078\u30d6\u30f3.com", kUnsafe],
// U+30FC should be preceded by a Hiragana/Katakana. // Katakana + U+30FC + Han
["xn--lck0ip02qw5ya.jp", "\u30ab\u30fc\u91ce\u7403.jp", kSafe], // Hiragana + U+30FC + Han
["xn--u8j5tr47nw5ya.jp", "\u304b\u30fc\u91ce\u7403.jp", kSafe], // U+30FC + Han
["xn--weka801xo02a.com", "\u30fc\u52d5\u753b\u30fc.com", kUnsafe], // Han + U+30FC + Han
["xn--wekz60nb2ay85atj0b.jp", "\u65e5\u672c\u30fc\u91ce\u7403.jp", kUnsafe], // U+30FC at the beginning
["xn--wek060nb2a.jp", "\u30fc\u65e5\u672c.jp", kUnsafe], // Latin + U+30FC + Latin
["xn--abcdef-r64e.jp", "abc\u30fcdef.jp", kUnsafe],
// U+30FB (・) is not allowed next to Latin, but allowed otherwise. // U+30FB + Han
["xn--vekt920a.jp", "\u30fb\u91ce.jp", kSafe], // Han + U+30FB + Han
["xn--vek160nb2ay85atj0b.jp", "\u65e5\u672c\u30fb\u91ce\u7403.jp", kSafe], // Latin + U+30FB + Latin
["xn--abcdef-k64e.jp", "abc\u30fbdef.jp", kUnsafe], // U+30FB + Latin
["xn--abc-os4b.jp", "\u30fbabc.jp", kUnsafe],
// U+30FD (ヽ) is allowed only after Katakana. // Katakana + U+30FD
["xn--lck2i.jp", "\u30ab\u30fd.jp", kSafe], // Hiragana + U+30FD
["xn--u8j7t.jp", "\u304b\u30fd.jp", kUnsafe], // Han + U+30FD
["xn--xek368f.jp", "\u4e00\u30fd.jp", kUnsafe],
["xn--a-mju.jp", "a\u30fd.jp", kUnsafe],
["xn--a1-bo4a.jp", "a1\u30fd.jp", kUnsafe],
// U+30FE (ヾ) is allowed only after Katakana. // Katakana + U+30FE
["xn--lck4i.jp", "\u30ab\u30fe.jp", kSafe], // Hiragana + U+30FE
["xn--u8j9t.jp", "\u304b\u30fe.jp", kUnsafe], // Han + U+30FE
["xn--yek168f.jp", "\u4e00\u30fe.jp", kUnsafe],
["xn--a-oju.jp", "a\u30fe.jp", kUnsafe],
["xn--a1-eo4a.jp", "a1\u30fe.jp", kUnsafe],
// Cyrillic labels made of Latin-look-alike Cyrillic letters. // 1) ѕсоре.com with ѕсоре in Cyrillic.
["xn--e1argc3h.com", "\u0455\u0441\u043e\u0440\u0435.com", kUnsafe, cyrillicConfusableExpectation()], // 2) ѕсоре123.com with ѕсоре in Cyrillic.
["xn--123-qdd8bmf3n.com", "\u0455\u0441\u043e\u0440\u0435123.com", kUnsafe, cyrillicConfusableExpectation()], // 3) ѕсоре-рау.com with ѕсоре and рау in Cyrillic.
["xn----8sbn9akccw8m.com", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.com", kUnsafe, cyrillicConfusableExpectation()], // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between // them.
["xn--1-8sbn9akccw8m.com", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.com", kUnsafe, cyrillicConfusableExpectation()],
// The same as above three, but in IDN TLD (рф). // 1) ѕсоре.рф with ѕсоре in Cyrillic.
["xn--e1argc3h.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435.\u0440\u0444", kSafe], // 2) ѕсоре123.рф with ѕсоре in Cyrillic.
["xn--123-qdd8bmf3n.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435123.\u0440\u0444", kSafe], // 3) ѕсоре-рау.рф with ѕсоре and рау in Cyrillic.
["xn----8sbn9akccw8m.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.\u0440\u0444", kSafe], // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between // them.
["xn--1-8sbn9akccw8m.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.\u0440\u0444", kSafe],
// Same as above three, but in .ru TLD. // 1) ѕсоре.ru with ѕсоре in Cyrillic.
["xn--e1argc3h.ru", "\u0455\u0441\u043e\u0440\u0435.ru", kSafe], // 2) ѕсоре123.ru with ѕсоре in Cyrillic.
["xn--123-qdd8bmf3n.ru", "\u0455\u0441\u043e\u0440\u0435123.ru", kSafe], // 3) ѕсоре-рау.ru with ѕсоре and рау in Cyrillic.
["xn----8sbn9akccw8m.ru", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.ru", kSafe], // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between // them.
["xn--1-8sbn9akccw8m.ru", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.ru", kSafe],
// ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. The label will remain // punycode while the TLD will be decoded.
["xn----8sbn9akccw8m.xn--3e0b707e", "xn----8sbn9akccw8m.\ud55c\uad6d", kSafe, cyrillicConfusableExpectation()],
// музей (museum in Russian) has characters without a Latin-look-alike.
["xn--e1adhj9a.com", "\u043c\u0443\u0437\u0435\u0439.com", kSafe],
// ѕсоԗе.com is Cyrillic with Latin lookalikes.
["xn--e1ari3f61c.com", "\u0455\u0441\u043e\u0517\u0435.com", kUnsafe, cyrillicConfusableExpectation()],
// ыоԍ.com is Cyrillic with Latin lookalikes.
["xn--n1az74c.com", "\u044b\u043e\u050d.com", kUnsafe],
// сю.com is Cyrillic with Latin lookalikes.
["xn--q1a0a.com", "\u0441\u044e.com", kUnsafe, cyrillicConfusableExpectation()],
// Regression test for lowercase letters in whole script confusable // lookalike character lists.
["xn--80a8a6a.com", "\u0430\u044c\u0441.com", kUnsafe, cyrillicConfusableExpectation()],
// googlе.한국 where е is Cyrillic. This tests the generic case when one // label is not allowed but other labels in the domain name are still // decoded. Here, googlе is left in punycode but the TLD is decoded.
["xn--googl-3we.xn--3e0b707e", "xn--googl-3we.\ud55c\uad6d", kSafe],
// Combining Diacritic marks after a script other than Latin-Greek-Cyrillic
["xn--rsa2568fvxya.com", "\ud55c\u0307\uae00.com", kUnsafe], // 한́글.com
["xn--rsa0336bjom.com", "\u6f22\u0307\u5b57.com", kUnsafe], // 漢̇字.com // नागरी́.com
["xn--lsa922apb7a6do.com", "\u0928\u093e\u0917\u0930\u0940\u0301.com", kUnsafe],
// Similarity checks against the list of top domains. "digklmo68.com" and // 'digklmo68.co.uk" are listed for unittest in the top domain list. // đigklmo68.com:
["xn--igklmo68-kcb.com", "\u0111igklmo68.com", kUnsafe, "DISABLED"], // www.đigklmo68.com:
["www.xn--igklmo68-kcb.com", "www.\u0111igklmo68.com", kUnsafe, "DISABLED"], // foo.bar.đigklmo68.com:
["foo.bar.xn--igklmo68-kcb.com", "foo.bar.\u0111igklmo68.com", kUnsafe, "DISABLED"], // đigklmo68.co.uk:
["xn--igklmo68-kcb.co.uk", "\u0111igklmo68.co.uk", kUnsafe, "DISABLED"], // mail.đigklmo68.co.uk:
["mail.xn--igklmo68-kcb.co.uk", "mail.\u0111igklmo68.co.uk", kUnsafe, "DISABLED"], // di̇gklmo68.com:
["xn--digklmo68-6jf.com", "di\u0307gklmo68.com", kUnsafe], // dig̱klmo68.com:
["xn--digklmo68-7vf.com", "dig\u0331klmo68.com", kUnsafe, "DISABLED"], // digĸlmo68.com:
["xn--diglmo68-omb.com", "dig\u0138lmo68.com", kUnsafe], // digkłmo68.com:
["xn--digkmo68-9ob.com", "digk\u0142mo68.com", kUnsafe, "DISABLED"], // digklṃo68.com:
["xn--digklo68-l89c.com", "digkl\u1e43o68.com", kUnsafe, "DISABLED"], // digklmø68.com:
["xn--digklm68-b5a.com", "digklm\u00f868.com", kUnsafe, "DISABLED"], // digklmoб8.com:
["xn--digklmo8-h7g.com", "digklmo\u04318.com", kUnsafe], // digklmo6৪.com:
["xn--digklmo6-7yr.com", "digklmo6\u09ea.com", kUnsafe],
// 'islkpx123.com' is in the test domain list. // 'іѕӏкрх123' can look like 'islkpx123' in some fonts.
["xn--123-bed4a4a6hh40i.com", "\u0456\u0455\u04cf\u043a\u0440\u0445123.com", kUnsafe, cyrillicConfusableExpectation()],
// 'o2.com', '28.com', '39.com', '43.com', '89.com', 'oo.com' and 'qq.com' // are all explicitly added to the test domain list to aid testing of // Latin-lookalikes that are numerics in other character sets and similar // edge cases. // // Bengali:
["xn--07be.com", "\u09e6\u09e8.com", kUnsafe, "DISABLED"],
["xn--27be.com", "\u09e8\u09ea.com", kUnsafe],
["xn--77ba.com", "\u09ed\u09ed.com", kUnsafe, "DISABLED"], // Gurmukhi:
["xn--qcce.com", "\u0a68\u0a6a.com", kUnsafe],
["xn--occe.com", "\u0a66\u0a68.com", kUnsafe, "DISABLED"],
["xn--rccd.com", "\u0a6b\u0a69.com", kUnsafe],
["xn--pcca.com", "\u0a67\u0a67.com", kUnsafe, "DISABLED"], // Telugu:
["xn--drcb.com", "\u0c69\u0c68.com", kUnsafe], // Devanagari:
["xn--d4be.com", "\u0966\u0968.com", kUnsafe, "DISABLED"], // Kannada:
["xn--yucg.com", "\u0ce6\u0ce9.com", kUnsafe, "DISABLED"],
["xn--yuco.com", "\u0ce6\u0ced.com", kUnsafe, "DISABLED"], // Oriya:
["xn--1jcf.com", "\u0b6b\u0b68.com", kUnsafe],
["xn--zjca.com", "\u0b66\u0b66.com", kUnsafe, "DISABLED"], // Gujarati:
["xn--cgce.com", "\u0ae6\u0ae8.com", kUnsafe, "DISABLED"],
["xn--fgci.com", "\u0ae9\u0aed.com", kUnsafe],
["xn--dgca.com", "\u0ae7\u0ae7.com", kUnsafe, "DISABLED"],
// U+04CF(ӏ) is mapped to multiple characters, lowercase L(l) and // lowercase I(i). Lowercase L is also regarded as similar to digit 1. // The test domain list has {ig, ld, 1gd}.com for Cyrillic. // ӏԍ.com
["xn--s5a8j.com", "\u04cf\u050d.com", kUnsafe], // ӏԃ.com
["xn--s5a8h.com", "\u04cf\u0503.com", kUnsafe], // ӏԍԃ.com
["xn--s5a8h3a.com", "\u04cf\u050d\u0503.com", kUnsafe],
// ငၔဌ၂ဝ.com (entirely made of Myanmar characters)
["xn--ridq5c9hnd.com", "\u1004\u1054\u100c\u1042\u101d.com", kUnsafe, "DISABLED"],
// ฟรฟร.com (made of two Thai characters. similar to wsws.com in // some fonts)
["xn--w3calb.com", "\u0e1f\u0e23\u0e1f\u0e23.com", kUnsafe], // พรบ.com
["xn--r3chp.com", "\u0e1e\u0e23\u0e1a.com", kUnsafe], // ฟรบ.com
["xn--r3cjm.com", "\u0e1f\u0e23\u0e1a.com", kUnsafe],
// Lao characters that look like w, s, o, and u. // ພຣບ.com
["xn--f7chp.com", "\u0e9e\u0ea3\u0e9a.com", kUnsafe, "DISABLED"], // ຟຣບ.com
["xn--f7cjm.com", "\u0e9f\u0ea3\u0e9a.com", kUnsafe, "DISABLED"], // ຟຮບ.com
["xn--f7cj9b.com", "\u0e9f\u0eae\u0e9a.com", kUnsafe, "DISABLED"], // ຟຮ໐ບ.com
["xn--f7cj9b5h.com", "\u0e9f\u0eae\u0ed0\u0e9a.com", kUnsafe, "DISABLED"],
// Lao character that looks like n. // ก11.com
["xn--11-lqi.com", "\u0e0111.com", kUnsafe, "DISABLED"],
// At one point the skeleton of 'w' was 'vv', ensure that // that it's treated as 'w'.
["xn--wder-qqa.com", "w\u00f3der.com", kUnsafe, "DISABLED"],
// Mixed digits: the first two will also fail mixed script test // Latin + ASCII digit + Deva digit
["xn--asc1deva-j0q.co.in", "asc1deva\u0967.co.in", kUnsafe], // Latin + Deva digit + Beng digit
["xn--devabeng-f0qu3f.co.in", "deva\u0967beng\u09e7.co.in", kUnsafe], // ASCII digit + Deva digit
["xn--79-v5f.co.in", "7\u09ea9.co.in", kUnsafe], // Deva digit + Beng digit
["xn--e4b0x.co.in", "\u0967\u09e7.co.in", kUnsafe], // U+4E00 (CJK Ideograph One) is not a digit, but it's not allowed next to // non-Kana scripts including numbers.
["xn--d12-s18d.cn", "d12\u4e00.cn", kUnsafe, "DISABLED"], // One that's really long that will force a buffer realloc
["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", kSafe],
// Not allowed; characters outside [:Identifier_Status=Allowed:] // Limited Use Scripts: UTS 31 Table 7. // Vai
["xn--sn8a.com", "\ua50b.com", kUnsafe], // 'CARD' look-alike in Cherokee
["xn--58db0a9q.com", "\u13df\u13aa\u13a1\u13a0.com", kUnsafe], // Scripts excluded from Identifiers: UTS 31 Table 4 // Coptic
["xn--5ya.com", "\u03e7.com", kUnsafe], // Old Italic
["xn--097cc.com", "\U00010300\U00010301.com", kUnsafe],
// U+115F (Hangul Filler)
["xn--osd3820f24c.kr", "\uac00\ub098\u115f.kr", kInvalid],
["www.xn--google-ho0coa.com", "www.\u2039google\u203a.com", kUnsafe], // Latin small capital w: hardᴡare.com
["xn--hardare-l41c.com", "hard\u1d21are.com", kUnsafe], // Minus Sign(U+2212)
["xn--t9g238xc2a.jp", "\u65e5\u2212\u672c.jp", kUnsafe], // Latin Small Letter Script G: ɡɡ.com
["xn--0naa.com", "\u0261\u0261.com", kUnsafe], // Hangul Jamo(U+11xx)
["xn--0pdc3b.com", "\u1102\u1103\u1110.com", kUnsafe], // degree sign: 36°c.com
["xn--36c-tfa.com", "36\u00b0c.com", kUnsafe], // Pound sign
["xn--5free-fga.com", "5free\u00a3.com", kUnsafe], // Hebrew points (U+05B0, U+05B6)
["xn--7cbl2kc2a.com", "\u05e1\u05b6\u05e7\u05b0\u05e1.com", kUnsafe], // Danda(U+0964)
["xn--81bp1b6ch8s.com", "\u0924\u093f\u091c\u0964\u0930\u0940.com", kUnsafe], // Small letter script G(U+0261)
["xn--oogle-qmc.com", "\u0261oogle.com", kUnsafe], // Small Katakana Extension(U+31F1)
["xn--wlk.com", "\u31f1.com", kUnsafe], // Heart symbol: ♥
["xn--ab-u0x.com", "ab\u2665.com", kUnsafe], // Emoji
["xn--vi8hiv.xyz", "\U0001f355\U0001f4a9.xyz", kUnsafe], // Registered trade mark
["xn--egistered-fna.com", "\u00aeegistered.com", kUnsafe], // Latin Letter Retroflex Click
["xn--registered-25c.com", "registered\u01c3.com", kUnsafe], // ASCII '!' not allowed in IDN
["xn--!-257eu42c.kr", "\uc548\ub155!.kr", kUnsafe], // 'GOOGLE' in IPA extension: ɢᴏᴏɢʟᴇ
["xn--1naa7pn51hcbaa.com", "\u0262\u1d0f\u1d0f\u0262\u029f\u1d07.com", kUnsafe], // Padlock icon spoof.
["xn--google-hj64e.com", "\U0001f512google.com", kUnsafe],
// Custom block list // Combining Long Solidus Overlay
["google.xn--comabc-k8d", "google.com\u0338abc", kUnsafe], // Hyphenation Point instead of Katakana Middle dot
["xn--svgy16dha.jp", "\u30a1\u2027\u30a3.jp", kUnsafe], // Gershayim with other Hebrew characters is allowed.
["xn--5db6bh9b.il", "\u05e9\u05d1\u05f4\u05e6.il", kSafe, "DISABLED"], // Hebrew Gershayim with Latin is invalid according to Python's idna // package.
["xn--ab-yod.com", "a\u05f4b.com", kInvalid], // Hebrew Gershayim with Arabic is disallowed.
["xn--5eb7h.eg", "\u0628\u05f4.eg", kUnsafe], // #if BUILDFLAG(IS_APPLE) // These characters are blocked due to a font issue on Mac. // Tibetan transliteration characters.
["xn--com-lum.test.pl", "com\u0f8c.test.pl", kUnsafe], // Arabic letter KASHMIRI YEH
["xn--fgb.com", "\u0620.com", kUnsafe, "macosx"], // #endif
// Hyphens (http://unicode.org/cldr/utility/confusables.jsp?a=-) // Hyphen-Minus (the only hyphen allowed) // abc-def
["abc-def.com", "abc-def.com", kSafe], // Modifier Letter Minus Sign
["xn--abcdef-5od.com", "abc\u02d7def.com", kUnsafe], // Hyphen
["xn--abcdef-dg0c.com", "abc\u2010def.com", kUnsafe], // Non-Breaking Hyphen // This is actually an invalid IDNA domain (U+2011 normalizes to U+2010), // but it is included to ensure that we do not inadvertently allow this // character to be displayed as Unicode.
["xn--abcdef-kg0c.com", "abc\u2011def.com", kInvalid], // Figure Dash. // Python's idna package refuses to decode the minus signs and dashes. ICU // decodes them but treats them as unsafe in spoof checks, so these test // cases are marked as unsafe instead of invalid.
["xn--abcdef-rg0c.com", "abc\u2012def.com", kUnsafe], // En Dash
["xn--abcdef-yg0c.com", "abc\u2013def.com", kUnsafe], // Hyphen Bullet
["xn--abcdef-kq0c.com", "abc\u2043def.com", kUnsafe], // Minus Sign
["xn--abcdef-5d3c.com", "abc\u2212def.com", kUnsafe], // Heavy Minus Sign
["xn--abcdef-kg1d.com", "abc\u2796def.com", kUnsafe], // Em Dash // Small Em Dash (U+FE58) is normalized to Em Dash.
["xn--abcdef-5g0c.com", "abc\u2014def.com", kUnsafe], // Coptic Small Letter Dialect-P Ni. Looks like dash. // Coptic Capital Letter Dialect-P Ni is normalized to small letter.
["xn--abcdef-yy8d.com", "abc\u2cbbdef.com", kUnsafe],
// Block single/double-quote-like characters. // U+02BB (ʻ)
["xn--ab-8nb.com", "a\u02bbb.com", kUnsafe], // U+02BC (ʼ)
["xn--ab-cob.com", "a\u02bcb.com", kUnsafe], // U+144A: Not allowed to mix with scripts other than Canadian Syllabics.
["xn--ab-jom.com", "a\u144ab.com", kUnsafe],
["xn--xcec9s.com", "\u1401\u144a\u1402.com", kUnsafe],
// Custom dangerous patterns // Two Katakana-Hiragana combining mark in a row
["google.xn--com-oh4ba.evil.jp", "google.com\u309a\u309a.evil.jp", kUnsafe], // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}.
["google.xn--comevil-v04f.jp", "google.com\u30ceevil.jp", kUnsafe], // TODO(jshin): Review the danger of allowing the following two. // Hiragana 'No' by itself is allowed.
["xn--ldk.jp", "\u30ce.jp", kSafe], // Hebrew Gershayim used by itself is allowed.
["xn--5eb.il", "\u05f4.il", kSafe, "DISABLED"],
// Block RTL nonspacing marks (NSM) after unrelated scripts.
["xn--foog-ycg.com", "foog\u0650.com", kUnsafe], // Latin + Arabic N]M
["xn--foog-jdg.com", "foog\u0654.com", kUnsafe], // Latin + Arabic N]M
["xn--foog-jhg.com", "foog\u0670.com", kUnsafe], // Latin + Arbic N]M
["xn--foog-opf.com", "foog\u05b4.com", kUnsafe], // Latin + Hebrew N]M
["xn--shb5495f.com", "\uac00\u0650.com", kUnsafe], // Hang + Arabic N]M
// 4 Deviation characters between IDNA 2003 and IDNA 2008 // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma // and the latter two are mapped away. However, the punycode form should // remain in punycode. // U+00DF(sharp-s)
["xn--fu-hia.de", "fu\u00df.de", kUnsafe, "DISABLED"], // U+03C2(final-sigma)
["xn--mxac2c.gr", "\u03b1\u03b2\u03c2.gr", kUnsafe, "DISABLED"], // U+200C(ZWNJ)
["xn--h2by8byc123p.in", "\u0924\u094d\u200c\u0930\u093f.in", kUnsafe], // U+200C(ZWJ)
["xn--11b6iy14e.in", "\u0915\u094d\u200d.in", kUnsafe],
// Math Monospace Small A. When entered in Unicode, it's canonicalized to // 'a'. The punycode form should remain in punycode.
["xn--bc-9x80a.xyz", "\U0001d68abc.xyz", kInvalid], // Math Sans Bold Capital Alpha
["xn--bc-rg90a.xyz", "\U0001d756bc.xyz", kInvalid], // U+3000 is canonicalized to a space(U+0020), but the punycode form // should remain in punycode.
["xn--p6j412gn7f.cn", "\u4e2d\u56fd\u3000", kInvalid], // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form // should remain in punycode.
["xn--r6j012gn7f.cn", "\u4e2d\u56fd\u3002", kInvalid], // Invalid punycode // Has a codepoint beyond U+10FFFF.
["xn--krank-kg706554a", "", kInvalid], // '?' in punycode.
["xn--hello?world.com", "", kInvalid],
// Not allowed in UTS46/IDNA 2008 // Georgian Capital Letter(U+10BD)
["xn--1nd.com", "\u10bd.com", kInvalid], // 3rd and 4th characters are '-'.
["xn-----8kci4dhsd", "\u0440\u0443--\u0430\u0432\u0442\u043e", kInvalid, "DISABLED"], // Leading combining mark
["xn--72b.com", "\u093e.com", kInvalid], // BiDi check per IDNA 2008/UTS 46 // Cannot starts with AN(Arabic-Indic Number)
["xn--8hbae.eg", "\u0662\u0660\u0660.eg", kInvalid], // Cannot start with a RTL character and ends with a LTR
["xn--x-ymcov.eg", "\u062c\u0627\u0631x.eg", kInvalid], // Can start with a RTL character and ends with EN(European Number)
["xn--2-ymcov.eg", "\u062c\u0627\u06312.eg", kSafe], // Can start with a RTL and end with AN
["xn--mgbjq0r.eg", "\u062c\u0627\u0631\u0662.eg", kSafe],
// Extremely rare Latin letters // Latin Ext B - Pinyin: ǔnion.com
["xn--nion-unb.com", "\u01d4nion.com", kUnsafe, "DISABLED"], // Latin Ext C: ⱴase.com
["xn--ase-7z0b.com", "\u2c74ase.com", kUnsafe], // Latin Ext D: ꝴode.com
["xn--ode-ut3l.com", "\ua774ode.com", kUnsafe], // Latin Ext Additional: ḷily.com
["xn--ily-n3y.com", "\u1e37ily.com", kUnsafe, "DISABLED"], // Latin Ext E: ꬺove.com
["xn--ove-8y6l.com", "\uab3aove.com", kUnsafe], // Greek Ext: ᾳβγ.com
["xn--nxac616s.com", "\u1fb3\u03b2\u03b3.com", kInvalid], // Cyrillic Ext A (label cannot begin with an illegal combining character).
["xn--lrj.com", "\u2def.com", kInvalid], // Cyrillic Ext B: ꙡ.com
["xn--kx8a.com", "\ua661.com", kUnsafe], // Cyrillic Ext C: ᲂ.com (Narrow o)
["xn--43f.com", "\u1c82.com", kInvalid],
// The skeleton of Extended Arabic-Indic Digit Zero (۰) is a dot. Check that // this is handled correctly (crbug/877045).
["xn--dmb", "\u06f0", kSafe],
// Test that top domains whose skeletons are the same as the domain name are // handled properly. In this case, tést.net should match test.net top // domain and not be converted to unicode.
["xn--tst-bma.net", "t\u00e9st.net", kUnsafe, "DISABLED"], // Variations of the above, for testing crbug.com/925199. // some.tést.net should match test.net.
["some.xn--tst-bma.net", "some.t\u00e9st.net", kUnsafe, "DISABLED"], // The following should not match test.net, so should be converted to // unicode. // ést.net (a suffix of tést.net).
["xn--st-9ia.net", "\u00e9st.net", kSafe], // some.ést.net
["some.xn--st-9ia.net", "some.\u00e9st.net", kSafe], // atést.net (tést.net is a suffix of atést.net)
["xn--atst-cpa.net", "at\u00e9st.net", kSafe], // some.atést.net
["some.xn--atst-cpa.net", "some.at\u00e9st.net", kSafe],
// Modifier-letter-voicing should be blocked (wwwˬtest.com).
["xn--wwwtest-2be.com", "www\u02ectest.com", kUnsafe, "DISABLED"],
// oĸ.com: Not a top domain, should be blocked because of Kra.
["xn--o-tka.com", "o\u0138.com", kUnsafe],
// U+4E00 and U+3127 should be blocked when next to non-CJK.
["xn--ipaddress-w75n.com", "ip\u4e00address.com", kUnsafe],
["xn--ipaddress-wx5h.com", "ip\u3127address.com", kUnsafe], // U+4E00 and U+3127 at the beginning and end of a string.
["xn--google-gg5e.com", "google\u3127.com", kUnsafe],
["xn--google-9f5e.com", "\u3127google.com", kUnsafe],
["xn--google-gn7i.com", "google\u4e00.com", kUnsafe],
["xn--google-9m7i.com", "\u4e00google.com", kUnsafe], // These are allowed because U+4E00 and U+3127 are not immediately next to // non-CJK.
["xn--gamer-fg1hz05u.com", "\u4e00\u751fgamer.com", kSafe],
["xn--gamer-kg1hy05u.com", "gamer\u751f\u4e00.com", kSafe],
["xn--gamer-f94d4426b.com", "\u3127\u751fgamer.com", kSafe],
["xn--gamer-k94d3426b.com", "gamer\u751f\u3127.com", kSafe],
["xn--4gqz91g.com", "\u4e00\u732b.com", kSafe],
["xn--4fkv10r.com", "\u3127\u732b.com", kSafe], // U+4E00 with another ideograph.
["xn--4gqc.com", "\u4e00\u4e01.com", kSafe],
// CJK ideographs looking like slashes should be blocked when next to // non-CJK.
["example.xn--comtest-k63k", "example.com\u4e36test", kUnsafe],
["example.xn--comtest-u83k", "example.com\u4e40test", kUnsafe],
["example.xn--comtest-283k", "example.com\u4e41test", kUnsafe],
["example.xn--comtest-m83k", "example.com\u4e3ftest", kUnsafe], // This is allowed because the ideographs are not immediately next to // non-CJK.
["xn--oiqsace.com", "\u4e36\u4e40\u4e41\u4e3f.com", kSafe],
// Kana voiced sound marks are not allowed.
["xn--google-1m4e.com", "google\u3099.com", kUnsafe],
["xn--google-8m4e.com", "google\u309A.com", kUnsafe],
// Small letter theta looks like a zero.
["xn--123456789-yzg.com", "123456789\u03b8.com", kUnsafe],
// U+05D7 can look like Latin n in many fonts.
["xn--ceba.com", "\u05d7\u05d7.com", kUnsafe, "DISABLED"],
// U+00FE (þ) and U+00F0 (ð) are only allowed under the .is TLD.
["xn--acdef-wva.com", "a\u00fecdef.com", kUnsafe],
["xn--mnpqr-jta.com", "mn\u00f0pqr.com", kUnsafe],
["xn--acdef-wva.is", "a\u00fecdef.is", kSafe],
["xn--mnpqr-jta.is", "mn\u00f0pqr.is", kSafe],
// U+0259 (ə) is only allowed under the .az TLD.
["xn--xample-vyc.com", "\u0259xample.com", kUnsafe],
["xn--xample-vyc.az", "\u0259xample.az", kSafe],
// U+00B7 is only allowed on Catalan domains between two l's.
["xn--googlecom-5pa.com", "google\u00b7com.com", kUnsafe],
["xn--ll-0ea.com", "l\u00b7l.com", kUnsafe],
["xn--ll-0ea.cat", "l\u00b7l.cat", kSafe],
["xn--al-0ea.cat", "a\u00b7l.cat", kUnsafe],
["xn--la-0ea.cat", "l\u00b7a.cat", kUnsafe],
["xn--l-fda.cat", "\u00b7l.cat", kUnsafe],
["xn--l-gda.cat", "l\u00b7.cat", kUnsafe],
// crbug.com/769547: Subdomains of top domains should be allowed.
["xn--xample-9ua.test.net", "\u00e9xample.test.net", kSafe], // Skeleton of the eTLD+1 matches a top domain, but the eTLD+1 itself is // not a top domain. Should not be decoded to unicode.
["xn--xample-9ua.test.xn--nt-bja", "\u00e9xample.test.n\u00e9t", kUnsafe, "DISABLED"],
// Digit lookalike check of 16კ.com with character “კ” (U+10D9) // Test case for https://crbug.com/1156531
["xn--16-1ik.com", "16\u10d9.com", kUnsafe],
// Skeleton generator check of officeკ65.com with character “კ” (U+10D9) // Test case for https://crbug.com/1156531
["xn--office65-l04a.com", "office\u10d965.com", kUnsafe],
// Digit lookalike check of 16ੜ.com with character “ੜ” (U+0A5C) // Test case for https://crbug.com/1156531 (missed skeleton map)
["xn--16-ogg.com", "16\u0a5c.com", kUnsafe],
// Skeleton generator check of officeੜ65.com with character “ੜ” (U+0A5C) // Test case for https://crbug.com/1156531 (missed skeleton map)
["xn--office65-hts.com", "office\u0a5c65.com", kUnsafe],
// New test cases go ↑↑ above.
// /!\ WARNING: You MUST use tools/security/idn_test_case_generator.py to // generate new test cases, as specified by the comment at the top of this // test list. Why must you use that python script? // 1. It is easy to get things wrong. There were several hand-crafted // incorrect test cases committed that was later fixed. // 2. This test _also_ is a test of Chromium's IDN encoder/decoder, so using // Chromium's IDN encoder/decoder to generate test files loses an // advantage of having Python's IDN encode/decode the tests.
];
add_task(async function test_chrome_spoofs() {
testCases = testCases.concat(additionalTestcases); for (let test of testCases) {
let result = "\uFFFD"; try {
result = idnService.convertToDisplayIDN(test[0]);
} catch (e) {} // If test[3] is set to a platform, the test is expected to pass only on that platform
let expectedFail =
test.length == 4 &&
(test[3] == "DISABLED" || (test[3] && test[3] != AppConstants.platform)); if (test[2] == kSafe) {
checkEquals(
result,
test[1],
`kSafe label ${test[0]} should convert to ${test[1]}`,
expectedFail
);
} elseif (test[2] == kUnsafe) {
checkEquals(
result,
test[0],
`kUnsafe label ${test[0]} should not convert to ${test[1]}`,
expectedFail
);
} elseif (test[2] == kInvalid) {
checkEquals(
result, "\uFFFD",
`kInvalid label ${test[0]} should throw`,
expectedFail
);
}
}
});
add_task(async function test_interpuncts_fqdn() {
let isAscii = {};
let result = idnService.convertToDisplayIDN("xn--ll-0ea.cat.", isAscii); Assert.equal(result, "l\u00b7l.cat.");
});
Messung V0.5
¤ Dauer der Verarbeitung: 0.25 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.