001package votorola.g.mail; // Copyright 2008-2009, Les Hazlewood, Michael Allan. Modified from http://www.leshazlewood.com/?p=5. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. A copy of the License is included at votorola/_/licence/Apache-2.0.txt. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 002 003import java.util.regex.Pattern; 004import votorola.g.lang.*; 005 006 007/** Validation of email addresses using a regular expression pattern. 008 * 009 * @see <a href='http://www.ietf.org/rfc/rfc822.txt' 010 * >ietf.org/rfc/rfc822.txt</a> 011 */ 012@ThreadSafe class AddressValidationP 013{ 014 015 private AddressValidationP() {} 016 017 018 019 /** Constructs the pattern of a valid email address consisting of the bare addr-spec 020 * having no personal part and no angle braces, and with no domain literals (like 021 * <code>joe@[192.168.1.100]</code>). 022 */ 023 static Pattern newPattern() 024 { 025 // This constant states that domain literals are allowed in the email address, e.g.: 026 // 027 // <p><tt>someone@[192.168.1.100]</tt> or <br/> 028 // <tt>john.doe@[23:33:A2:22:16:1F]</tt> or <br/> 029 // <tt>me@[my computer]</tt></p> 030 // 031 // <p>The RFC says these are valid email addresses, but most people don't like allowing them. 032 // If you don't want to allow them, and only want to allow valid domain names 033 // (<a href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a>, x.y.z.com, etc), 034 // change this constant to <tt>false</tt>. 035 // 036 // <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but 037 // you should set it depending on what you need for your application. 038 final boolean ALLOW_DOMAIN_LITERALS = false; 039 040 // This contstant states that quoted identifiers are allowed 041 // (using quotes and angle brackets around the raw address) are allowed, e.g.: 042 // 043 // <p><tt>"John Smith" <john.smith@somewhere.com></tt> 044 // 045 // <p>The RFC says this is a valid mailbox. If you don't want to 046 // allow this, because for example, you only want users to enter in 047 // a raw address (<tt>john.smith@somewhere.com</tt> - no quotes or angle 048 // brackets), then change this constant to <tt>false</tt>. 049 // 050 // <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but 051 // you should set it depending on what you need for your application. 052 final boolean ALLOW_QUOTED_IDENTIFIERS = false; 053 054 // RFC 2822 2.2.2 Structured Header Field Bodies 055 final String wsp = "[ \\t]"; //space or tab 056 final String fwsp = wsp + "*"; 057 058 //RFC 2822 3.2.1 Primitive tokens 059 final String dquote = "\\\""; 060 //ASCII Control characters excluding white space: 061 final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F"; 062 //all ASCII characters except CR and LF: 063 final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]"; 064 065 // RFC 2822 3.2.2 Quoted characters: 066 //single backslash followed by a text char 067 final String quotedPair = "(\\\\" + asciiText + ")"; 068 069 //RFC 2822 3.2.4 Atom: 070 final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]"; 071 final String atom = fwsp + atext + "+" + fwsp; 072 final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*"; 073 final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp; 074 075 //RFC 2822 3.2.5 Quoted strings: 076 //noWsCtl and the rest of ASCII except the doublequote and backslash characters: 077 final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]"; 078 final String qcontent = "(" + qtext + "|" + quotedPair + ")"; 079 final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote; 080 081 //RFC 2822 3.2.6 Miscellaneous tokens 082 final String word = "((" + atom + ")|(" + quotedString + "))"; 083 final String phrase = word + "+"; //one or more words. 084 085 //RFC 1035 tokens for domain names: 086 final String letter = "[a-zA-Z]"; 087 final String letDig = "[a-zA-Z0-9]"; 088 final String letDigHyp = "[a-zA-Z0-9-]"; 089 final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?"; 090 final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}"; 091 092 //RFC 2822 3.4 Address specification 093 //domain text - non white space controls and the rest of ASCII chars not including [, ], or \: 094 final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]"; 095 final String dcontent = dtext + "|" + quotedPair; 096 final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]"; 097 final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")"; 098 099 final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName; 100 101 final String localPart = "((" + dotAtom + ")|(" + quotedString + "))"; 102 final String addrSpec = localPart + "@" + domain; 103 final String angleAddr = "<" + addrSpec + ">"; 104 final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr; 105 final String mailbox = nameAddr + "|" + addrSpec; 106 107 //now compile a pattern for efficient re-use: 108 //if we're allowing quoted identifiers or not: 109 final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec; 110 return Pattern.compile( patternString ); 111 } 112 113 114 115}