### Eclipse Workspace Patch 1.0 #P ViaThinkSoft Java Utils Index: src/com/dominicsayers/isemail/IsEMail.java =================================================================== --- src/com/dominicsayers/isemail/IsEMail.java (revision 15) +++ src/com/dominicsayers/isemail/IsEMail.java (working copy) @@ -1,5 +1,13 @@ package com.dominicsayers.isemail; +// This version contains IANA TLD support! (TODO: Also add that feature to PHP variant) + +// TODO: Problem: Many test cases fail now because no correct TLD was given +// TODO: Problem: Are comments allowed in the TLD part? + +import java.util.Arrays; +import java.util.HashSet; + import javax.naming.NamingException; /** @@ -11,8 +19,7 @@ * @copyright 2010 Dominic Sayers; Java-Translation 2010 by Daniel Marschall * @license http://www.opensource.org/licenses/bsd-license.php BSD License * @see http://www.dominicsayers.com/isemail - * @version 1.17 - Upper length limit corrected to 254 characters; - * Java-Translation 2010-06-14 + * @version 1.18 - Added IANA TLD support; Java-Translation 2010-06-14 */ /* @@ -44,6 +51,42 @@ public class IsEMail { + // http://data.iana.org/TLD/tlds-alpha-by-domain.txt + // Version 2010052500, Last Updated Tue May 25 14:07:02 2010 UTC + private static final HashSet RECOGNIZED_IANA_TLDS = hmaker(new String[] { + "AC", "AD", "AE", "AERO", "AF", "AG", "AI", "AL", "AM", "AN", "AO", + "AQ", "AR", "ARPA", "AS", "ASIA", "AT", "AU", "AW", "AX", "AZ", + "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BIZ", "BJ", "BM", + "BN", "BO", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CAT", + "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", + "COM", "COOP", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", + "DK", "DM", "DO", "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", + "EU", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", + "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR", + "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", + "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ", "IR", + "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG", "KH", "KI", + "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", + "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME", + "MG", "MH", "MIL", "MK", "ML", "MM", "MN", "MO", "MOBI", "MP", + "MQ", "MR", "MS", "MT", "MU", "MUSEUM", "MV", "MW", "MX", "MY", + "MZ", "NA", "NAME", "NC", "NE", "NET", "NF", "NG", "NI", "NL", + "NO", "NP", "NR", "NU", "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", + "PH", "PK", "PL", "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", + "QA", "RE", "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", + "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", + "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH", "TJ", + "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT", "TV", + "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA", "VC", "VE", + "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D", + "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A", + "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA", + "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV", + "XN--MGBAAM7A8H", "XN--MGBERP4A5D4AR", "XN--P1AI", "XN--WGBH1C", + "XN--ZCKZAH", "YE", "YT", "ZA", "ZM", "ZW", }); + + public static final boolean CHECK_TLDS = true; + /** * Checks the syntax of an email address without DNS check. * @@ -610,6 +653,16 @@ return EMailSyntaxDiagnosis.ISEMAIL_TLDNUMERIC; } + // Check IANA TLDs + // lastElement is our TLD + if (CHECK_TLDS) { + // TODO: Also decode Punycode? + if (!RECOGNIZED_IANA_TLDS.contains(lastElement.toUpperCase())) { + return EMailSyntaxDiagnosis.ISEMAIL_UNKNOWN_TLD; + } + + } + // Check DNS? if (checkDNS) { try { @@ -648,4 +701,19 @@ private IsEMail() { } + + /** + * build a HashSet from a array of String literals. + * + * @param list + * array of strings + * + * @return HashSet you can use to test if a string is in the set. + */ + private static HashSet hmaker(String[] list) { + HashSet map = new HashSet(Math.max( + (int) (list.length / .75f) + 1, 16)); + map.addAll(Arrays.asList(list)); + return map; + } } Index: src/com/dominicsayers/isemail/EMailSyntaxDiagnosis.java =================================================================== --- src/com/dominicsayers/isemail/EMailSyntaxDiagnosis.java (revision 14) +++ src/com/dominicsayers/isemail/EMailSyntaxDiagnosis.java (working copy) @@ -10,12 +10,38 @@ * @copyright 2010 Dominic Sayers; Java-Translation 2010 by Daniel Marschall * @license http://www.opensource.org/licenses/bsd-license.php BSD License * @see http://www.dominicsayers.com/isemail - * @version 1.0 Initial release as Java code by Daniel Marschall + * @version 1.1 Added ISEMAIL_UNKNOWN_TLD */ public enum EMailSyntaxDiagnosis { - ISEMAIL_VALID, ISEMAIL_TOOLONG, ISEMAIL_NOAT, ISEMAIL_NOLOCALPART, ISEMAIL_NODOMAIN, ISEMAIL_ZEROLENGTHELEMENT, ISEMAIL_BADCOMMENT_START, ISEMAIL_BADCOMMENT_END, ISEMAIL_UNESCAPEDDELIM, ISEMAIL_EMPTYELEMENT, ISEMAIL_UNESCAPEDSPECIAL, ISEMAIL_LOCALTOOLONG, ISEMAIL_IPV4BADPREFIX, ISEMAIL_IPV6BADPREFIXMIXED, ISEMAIL_IPV6BADPREFIX, ISEMAIL_IPV6GROUPCOUNT, ISEMAIL_IPV6DOUBLEDOUBLECOLON, ISEMAIL_IPV6BADCHAR, ISEMAIL_IPV6TOOMANYGROUPS, ISEMAIL_TLD, ISEMAIL_DOMAINEMPTYELEMENT, ISEMAIL_DOMAINELEMENTTOOLONG, ISEMAIL_DOMAINBADCHAR, ISEMAIL_DOMAINTOOLONG, ISEMAIL_TLDNUMERIC, ISEMAIL_DOMAINNOTFOUND + ISEMAIL_VALID, + ISEMAIL_TOOLONG, + ISEMAIL_NOAT, + ISEMAIL_NOLOCALPART, + ISEMAIL_NODOMAIN, + ISEMAIL_ZEROLENGTHELEMENT, + ISEMAIL_BADCOMMENT_START, + ISEMAIL_BADCOMMENT_END, + ISEMAIL_UNESCAPEDDELIM, + ISEMAIL_EMPTYELEMENT, + ISEMAIL_UNESCAPEDSPECIAL, + ISEMAIL_LOCALTOOLONG, + ISEMAIL_IPV4BADPREFIX, + ISEMAIL_IPV6BADPREFIXMIXED, + ISEMAIL_IPV6BADPREFIX, + ISEMAIL_IPV6GROUPCOUNT, + ISEMAIL_IPV6DOUBLEDOUBLECOLON, + ISEMAIL_IPV6BADCHAR, + ISEMAIL_IPV6TOOMANYGROUPS, + ISEMAIL_TLD, + ISEMAIL_DOMAINEMPTYELEMENT, + ISEMAIL_DOMAINELEMENTTOOLONG, + ISEMAIL_DOMAINBADCHAR, + ISEMAIL_DOMAINTOOLONG, + ISEMAIL_TLDNUMERIC, + ISEMAIL_DOMAINNOTFOUND, + ISEMAIL_UNKNOWN_TLD, /* , ISEMAIL_NOTDEFINED */ }