001package votorola.g.mail; // Copyright 2008-2009, Les Hazlewood, Michael Allan.  Modified from http://www.leshazlewood.com/?p=5.  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.  A copy of the License is included at votorola/_/licence/Apache-2.0.txt.  Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the specific language governing permissions and limitations under the License.
002
003import java.util.regex.Pattern;
004import votorola.g.lang.*;
005
006
007/** Validation of email addresses using a regular expression pattern.
008  *
009  *     @see <a href='http://www.ietf.org/rfc/rfc822.txt'
010  *                             >ietf.org/rfc/rfc822.txt</a>
011  */
012@ThreadSafe class AddressValidationP
013{
014
015    private AddressValidationP() {}
016
017
018
019    /** Constructs the pattern of a valid email address consisting of the bare addr-spec
020      * having no personal part and no angle braces, and with no domain literals (like
021      * <code>joe@[192.168.1.100]</code>).
022      */
023    static Pattern newPattern()
024    {
025        // This constant states that domain literals are allowed in the email address, e.g.:
026        //
027        // <p><tt>someone@[192.168.1.100]</tt> or <br/>
028        // <tt>john.doe@[23:33:A2:22:16:1F]</tt> or <br/>
029        // <tt>me@[my computer]</tt></p>
030        //
031        // <p>The RFC says these are valid email addresses, but most people don't like allowing them.
032        // If you don't want to allow them, and only want to allow valid domain names
033        // (<a href="http://www.ietf.org/rfc/rfc1035.txt">RFC 1035</a>, x.y.z.com, etc),
034        // change this constant to <tt>false</tt>.
035        //
036        // <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but
037        // you should set it depending on what you need for your application.
038        final boolean ALLOW_DOMAIN_LITERALS = false;
039
040        // This contstant states that quoted identifiers are allowed
041        // (using quotes and angle brackets around the raw address) are allowed, e.g.:
042        //
043        // <p><tt>"John Smith" &lt;john.smith@somewhere.com&gt;</tt>
044        //
045        // <p>The RFC says this is a valid mailbox.  If you don't want to
046        // allow this, because for example, you only want users to enter in
047        // a raw address (<tt>john.smith@somewhere.com</tt> - no quotes or angle
048        // brackets), then change this constant to <tt>false</tt>.
049        //
050        // <p>Its default value is <tt>true</tt> to remain RFC 2822 compliant, but
051        // you should set it depending on what you need for your application.
052        final boolean ALLOW_QUOTED_IDENTIFIERS = false;
053
054        // RFC 2822 2.2.2 Structured Header Field Bodies
055        final String wsp = "[ \\t]"; //space or tab
056        final String fwsp = wsp + "*";
057
058        //RFC 2822 3.2.1 Primitive tokens
059        final String dquote = "\\\"";
060        //ASCII Control characters excluding white space:
061        final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F";
062        //all ASCII characters except CR and LF:
063        final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]";
064
065        // RFC 2822 3.2.2 Quoted characters:
066        //single backslash followed by a text char
067        final String quotedPair = "(\\\\" + asciiText + ")";
068
069        //RFC 2822 3.2.4 Atom:
070        final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]";
071        final String atom = fwsp + atext + "+" + fwsp;
072        final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*";
073        final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp;
074
075        //RFC 2822 3.2.5 Quoted strings:
076        //noWsCtl and the rest of ASCII except the doublequote and backslash characters:
077        final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]";
078        final String qcontent = "(" + qtext + "|" + quotedPair + ")";
079        final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote;
080
081        //RFC 2822 3.2.6 Miscellaneous tokens
082        final String word = "((" + atom + ")|(" + quotedString + "))";
083        final String phrase = word + "+"; //one or more words.
084
085        //RFC 1035 tokens for domain names:
086        final String letter = "[a-zA-Z]";
087        final String letDig = "[a-zA-Z0-9]";
088        final String letDigHyp = "[a-zA-Z0-9-]";
089        final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?";
090        final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}";
091
092        //RFC 2822 3.4 Address specification
093        //domain text - non white space controls and the rest of ASCII chars not including [, ], or \:
094        final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]";
095        final String dcontent = dtext + "|" + quotedPair;
096        final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]";
097        final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")";
098
099        final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName;
100
101        final String localPart = "((" + dotAtom + ")|(" + quotedString + "))";
102        final String addrSpec = localPart + "@" + domain;
103        final String angleAddr = "<" + addrSpec + ">";
104        final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr;
105        final String mailbox = nameAddr + "|" + addrSpec;
106
107        //now compile a pattern for efficient re-use:
108        //if we're allowing quoted identifiers or not:
109        final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec;
110        return Pattern.compile( patternString );
111    }
112
113
114
115}