Advanced java code dealing with real world problems.

Friday, September 4, 2009

Detect bounced emails - Part 2

In Part 1 we provided a class to retrieve RFC related components from a mime message. Now we are providing a class that scans the components to find out the reason why the email was rejected.
The reasons are grouped into bounce types. Following are the bounce types recognized by this scanner:
        HARD_BOUNCE
        SOFT_BOUNCE
        MAILBOX_FULL
        CC_USER
        MDN_RECEIPT // read receipt

/*
 * blog/javaclue/javamail/SmtpScanner.java
 * 
 * Copyright (C) 2009 JackW
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Lesser General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this library.
 * If not, see <http://www.gnu.org/licenses/>.
 */
package blog.javaclue.javamail;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Stack;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.log4j.Logger;

/**
 * Scan input string for RFC1893/RFC2821 mail status code
 * 
 * @author jackw
 */
final class SmtpScanner {
 static final Logger logger = Logger.getLogger(SmtpScanner.class);
 static final boolean isDebugEnabled = logger.isDebugEnabled();
 final int maxLenToScan = 8192*4; // scan up to 32k
 
 private static final HashMap<String, String> RFC1893_STATUS_CODE = new HashMap<String, String>();
 private static final HashMap<String, String> RFC1893_STATUS_DESC = new HashMap<String, String>();
 private static final HashMap<String, String> RFC2821_STATUS_CODE = new HashMap<String, String>();
 private static final HashMap<String, String> RFC2821_STATUS_DESC = new HashMap<String, String>();
 private static final HashMap<String, String> RFC2821_STATUS_MATCHINGTEXT = new HashMap<String, String>();
 
 public static enum BOUNCETYPE { GENERIC }; // default bounce type - not a bounce.
 public static enum BOUNCE_TYPES {
  HARD_BOUNCE, // Hard bounce - suspend,notify,close
  SOFT_BOUNCE, // Soft bounce - bounce++,close
  MAILBOX_FULL, // mailbox full, can be treated as Soft Bounce
  CC_USER, // Mail received as a Carbon Copy
  MDN_RECEIPT, // MDN - read receipt
 }

 private static SmtpScanner smtpCodeScan = null;

 private static final String
  LETTER_S = "s",
  LETTER_H = "h",
  LETTER_F = "f",
  LETTER_K = "k",
  LETTER_U = "u";

 /**
  * default constructor
  */
 private SmtpScanner() throws IOException {
  loadRfc1893StatusCode();
  loadRfc2821StatusCode();
 }
 
 public static SmtpScanner getInstance() throws IOException {
  if (smtpCodeScan==null) {
   smtpCodeScan = new SmtpScanner();
  }
  return smtpCodeScan;
 }
 
 /**
  * returns a message id, null if not found
  * 
  * @param str -
  *            message body
  * @return message id, null if not found
  */
 String scanBody(String body) {
  String bounceType = scanBody(body, 1);
  if (bounceType == null) {
   bounceType = scanBody(body, 2);
  }
  return bounceType;
 }
 
 private static Pattern pattern1 = Pattern.compile("\\s([245]\\.\\d{1,3}\\.\\d{1,3})\\s", Pattern.DOTALL);
 private static Pattern pattern2 = Pattern.compile("\\s([245]\\d\\d)\\s", Pattern.DOTALL);
 
 /**
  * <ul>
  * <li> first pass: check if it contains a RFC1893 code. RFC1893 codes are
  * from 5 to 9 bytes long (x.x.x -> x.xxx.xxx) and start with 2.x.x or 4.x.x
  * or 5.x.x
  * <li> second pass: check if it contains a 3 digit numeric number: 2xx, 4xx
  * or 5xx.
  * </ul>
  * 
  * @param str -
  *            message body
  * @param pass -
  *            1) first pass: look for RFC1893 token (x.x.x).
  *            2) second pass: look for RFC2821 token (xxx), must also match reply text.
  * @return bounce type or null if no RFC code is found.
  */
 private String scanBody(String body, int pass) {
  if (isDebugEnabled)
   logger.debug("Entering the examineBody method, pass " + pass);
  if (StringUtil.isEmpty(body)) { // sanity check
   return null;
  }
  BOUNCE_TYPES bounceType = null;
  if (pass == 1) {
   Matcher m = pattern1.matcher(StringUtil.cut(body, maxLenToScan));
   if (m.find()) { // only one time
    String token = m.group(m.groupCount());
    logger.info("examineBody(): RFC1893 token found: " + token);
    if ((bounceType = searchRfc1893CodeTable(token)) != null) {
     return bounceType.toString();
    }
    else if (token.startsWith("5.")) { // 5.x.x
     return BOUNCE_TYPES.HARD_BOUNCE.toString();
    }
    else if (token.startsWith("4.")) { // 4.x.x
     return BOUNCE_TYPES.SOFT_BOUNCE.toString();
    }
    else if (token.startsWith("2.")) { // 2.x.x
     // 2.x.x = OK message returned, MDN receipt.
     return BOUNCE_TYPES.MDN_RECEIPT.toString();
    }
   }
  }
  else if (pass == 2) {
   Matcher m = pattern2.matcher(StringUtil.cut(body, maxLenToScan));
   int end = 0;
   int count = 0;
   while (m.find(end) && count++ < 2) { // repeat two times
    String token = m.group(m.groupCount());
    end = m.end(m.groupCount());
    logger.info("examineBody(): Numeric token found: " + token);
    if ((bounceType = searchRfc2821CodeTable(token)) != null) {
     //return bounceType;
     return matchRfcText(bounceType, token, body, end);
    }
    if (token.startsWith("5")) {
     // 5xx = permanent failure, re-send will fail
     String r = matchRfcText(BOUNCE_TYPES.HARD_BOUNCE, token, body, end);
     if (r != null) return r;
     // else look for the second token
    }
    else if(token.equals("422")) {
     // 422 = mailbox full, re-send may be successful
     return matchRfcText(BOUNCE_TYPES.MAILBOX_FULL, token, body, end);
    }
    else if (token.startsWith("4")) {
     // 4xx = persistent transient failure, re-send may be successful
     String r = matchRfcText(BOUNCE_TYPES.SOFT_BOUNCE, token, body, end);
     if (r != null) return r;
     // else look for the second token
    }
    else if(token.startsWith("2")) {
     // 2xx = OK message returned.
    }
   }
  }
  return null;
 }

 /**
  * For RFC 2821, to further match reply text to prevent false positives.
  * 
  * @param bounceType -
  *            Bounce Type
  * @param code -
  *            RFC2821 code
  * @param tokens -
  *            message text stored in an array, each element holds a word.
  * @param idx -
  *            where the RFC2821 code located in the array
  * @return bounce type, or null if failed to match reply text.
  */
 private String matchRfcText(BOUNCE_TYPES bounceType, String code, String body, int idx) {
  String matchingText = RFC2821_STATUS_MATCHINGTEXT.get(code);
  if (matchingText == null) {
   if (code.startsWith("4")) {
    matchingText = RFC2821_STATUS_MATCHINGTEXT.get("4xx");
   }
   else if (code.startsWith("5")) {
    matchingText = RFC2821_STATUS_MATCHINGTEXT.get("5xx");
   }
   if (matchingText == null) { // just for safety
    return null;
   }
  }
  // RFC reply text - the first 120 characters after the RFC code 
  String rfcText = StringUtil.cut(body.substring(idx), 120);
  try {
   Pattern p = Pattern.compile(matchingText, Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
   Matcher m = p.matcher(rfcText);
   if (m.find()) {
    logger.info("Match Succeeded: [" + rfcText + "] matched [" + matchingText + "]");
    return bounceType.toString();
   }
   else {
    logger.info("Match Failed: [" + rfcText + "] did not match [" + matchingText + "]");
   }
  }
  catch (PatternSyntaxException e) {
   logger.error("PatternSyntaxException caught", e);
  }
  return null;
 }
 
 /**
  * search smtp code table by RFC1893 token.
  * 
  * @param token
  *            DSN status token, for example: 5.0.0
  * @return message id related to the token
  */
 private BOUNCE_TYPES searchRfc1893CodeTable(String token) {
  // search rfc1893 hash table - x.x.x
  BOUNCE_TYPES bounceType = searchRfcCodeTable(token, RFC1893_STATUS_CODE);
  // search rfc1893 hash table - .x.x
  if (bounceType == null) {
   bounceType = searchRfcCodeTable(token.substring(1), RFC1893_STATUS_CODE);
  }
  return bounceType;
 }
 
 /**
  * search smtp code table by RFC token.
  * 
  * @param token -
  *            DSN status token, for example: 5.0.0, or 500 depending on the
  *            map used
  * @param map -
  *            either RFC1893_STATUS_CODE or RFC2821_STATUS_CODE
  * @return message id of the token
  */
 private BOUNCE_TYPES searchRfcCodeTable(String token, HashMap<String, String> map) {
  String type = map.get(token);

  if (type != null) { // found RFC status code
   logger.info("searchRfcCodeTable(): A match is found for type: " + type);
   if (type.equals(LETTER_H)) {
    return BOUNCE_TYPES.HARD_BOUNCE;
   }
   else if (type.equals(LETTER_S)) {
    return BOUNCE_TYPES.SOFT_BOUNCE;
   }
   else if (type.equals(LETTER_F)) {
    return BOUNCE_TYPES.MAILBOX_FULL;
   }
   else if (type.equals(LETTER_K)) {
    return BOUNCE_TYPES.MDN_RECEIPT;
   }
   else if (type.equals(LETTER_U)) {
    if (token.startsWith("4")) {
     return BOUNCE_TYPES.SOFT_BOUNCE;
    }
    else if (token.startsWith("5")) {
     return BOUNCE_TYPES.HARD_BOUNCE;
    }
   }
  }
  return null;
 }
 
 /**
  * search smtp code table by RFC token.
  * 
  * @param token -
  *            RFC2821 token, for example: 500
  * @return message id of the token
  */
 private BOUNCE_TYPES searchRfc2821CodeTable(String token) {
  return searchRfcCodeTable(token, RFC2821_STATUS_CODE);
 }
 
 /**
  * load the rfc1893 code table, from Rfc1893.properties file, into memory.
  * 
  * @throws IOException
  */
 private void loadRfc1893StatusCode() throws IOException {
  ClassLoader loader = this.getClass().getClassLoader();
  try {
   // read in RFC1893 status code file and store it in two property objects
   InputStream is = loader.getResourceAsStream("Rfc1893.properties");
   BufferedReader fr = new BufferedReader(new InputStreamReader(is));
   String inStr=null, code=null;
   while ((inStr = fr.readLine()) != null) {
    if (!inStr.startsWith("#")) {
     if (isDebugEnabled)
      logger.debug("loadRfc1893StatusCode(): " + inStr);
     StringTokenizer st = new StringTokenizer(inStr, "^\r\n");
     if (st.countTokens() == 3) {
      code = st.nextToken();
      RFC1893_STATUS_CODE.put(code, st.nextToken());
      RFC1893_STATUS_DESC.put(code, st.nextToken());
     }
     else if (st.countTokens() == 0) {
      // ignore
     }
     else {
      logger.fatal("loadRfc1893StatusCode(): Wrong record format: " + inStr);
     }
    }
   }
   fr.close();
  }
  catch (FileNotFoundException ex) {
   logger.fatal("file Rfc1893.properties does not exist", ex);
   throw ex;
  }
  catch (IOException ex) {
   logger.fatal("IOException caught during loading statcode.conf", ex);
   throw ex;
  }
 }

 /**
  * load the rfc2821 code table, from Rfc2821.properties file, into memory.
  * 
  * @throws IOException
  */
 private void loadRfc2821StatusCode() throws IOException {
  ClassLoader loader = this.getClass().getClassLoader();
  try {
   // read in RFC2821 status code file and store it in two property objects
   InputStream is = loader.getResourceAsStream("Rfc2821.properties");
   BufferedReader fr = new BufferedReader(new InputStreamReader(is));
   String inStr=null, code=null;
   while ((inStr = fr.readLine()) != null) {
    if (!inStr.startsWith("#")) {
     if (isDebugEnabled)
      logger.debug("loadRfc2821StatusCode(): " + inStr);
     StringTokenizer st = new StringTokenizer(inStr, "^\r\n");
     if (st.countTokens() == 3) {
      code = st.nextToken(); // 1st token = RFC code
      RFC2821_STATUS_CODE.put(code, st.nextToken()); // 2nd token = type
      String desc = st.nextToken(); // 3rd token = description
      RFC2821_STATUS_DESC.put(code, desc);
      // extract regular expression to be further matched
      String matchingRegex = getMatchingRegex(desc);
      if (matchingRegex != null) {
       RFC2821_STATUS_MATCHINGTEXT.put(code, matchingRegex);
      }
     }
     else if (st.countTokens() == 0) {
      // ignore
     }
     else {
      logger.fatal("loadRfc2821StatusCode(): Wrong record format: " + inStr);
     }
    }
   }
   fr.close();
  }
  catch (FileNotFoundException ex) {
   logger.fatal("file Rfc2821.properties does not exist", ex);
   throw ex;
  }
  catch (IOException ex) {
   logger.fatal("IOException caught during loading statcode.conf", ex);
   throw ex;
  }
 }
 
 private String getMatchingRegex(String desc) throws IOException {
  int left = desc.indexOf("{");
  if (left < 0) {
   return null;
  }
  Stack<Integer> stack = new Stack<Integer>();
  stack.push(Integer.valueOf(left));
  int nextPos = left;
  while (stack.size() > 0) {
   int leftPos = desc.indexOf("{", nextPos + 1);
   int rightPos = desc.indexOf("}", nextPos + 1);
   if (leftPos > rightPos) {
    if (rightPos > 0) {
     stack.pop();
     nextPos = rightPos;
    }
   }
   else if (leftPos < rightPos) {
    if (leftPos > 0) {
     nextPos = leftPos;
     stack.push(Integer.valueOf(leftPos));
    }
    else if (rightPos > 0) {
     stack.pop();
     nextPos = rightPos;
    }
   }
   else {
    break;
   }
  }
  if (nextPos > left) {
   if (stack.size() == 0) {
    return desc.substring(left + 1, nextPos);
   }
   else {
    logger.error("getMatchingRegex() - missing close curly brace: " + desc);
    throw new IOException("Missing close curly brace: " + desc);
   }
  }
  return null;
 }

 public static void main(String[] args) {
  try {
   SmtpScanner scan = SmtpScanner.getInstance();
   String bounceType = scan.scanBody("aaaaab\n5.0.0\nefg ");
   System.out.println("BounceType: " + bounceType);
   bounceType = scan.scanBody("aaa 201 aab\n422\naccount is full ");
   System.out.println("BounceType: " + bounceType);
   bounceType = scan.scanBody("aaaaab\n400\ntemporary failure ");
   System.out.println("BounceType: " + bounceType);
   System.out.println(scan.getMatchingRegex("{(?:mailbox|account).{0,180}(?:storage|full|limit|quota)}"));
  }
  catch (Exception e) {
   e.printStackTrace();
  }
 }
}

This class needs two additional property files to function, save them under the root folder of your classpath:

1) Rfc1893.properties:

# RFC1893/RFC3463 status code and description
# status code = class "." subject "." detail
# 2.x.x Success
# 4.x.x Persistent Transient Failure
# 5.x.x Permanent Failure
#
# format: StatusCode^Type^Description
# type = 
#
# permanent failure
5.0.0^h^Other undefined status
5.1.0^h^Other address status
5.1.1^h^Bad destination mailbox address
5.1.2^h^Bad destination system address
5.1.3^h^Bad destination mailbox address syntax
5.1.4^h^Destination mailbox address ambiguous
5.1.5^h^Destination mailbox address invalid (source: Microsoft)
5.1.6^h^Mailbox has moved
5.1.7^h^Bad sender's mailbox address syntax
5.1.8^h^Bad sender's system address
5.2.0^h^Other or undefined mailbox status
5.2.1^h^Mailbox disabled, not accepting messages
5.2.2^f^Mailbox full
5.2.3^l^Message length exceeds administrative limit.
5.2.4^h^Mailing list expansion problem
5.3.0^h^Other or undefined mail system status
5.3.1^s^Mail system full
5.3.2^h^System not accepting network messages
5.3.3^h^System not capable of selected features
5.3.4^l^Message too big for system
5.3.5^h^System incorrectly configured
5.4.0^h^Other or undefined network or routing status
5.4.1^h^No answer from host
5.4.2^h^Bad connection
5.4.3^h^Routing server failure
5.4.4^h^Unable to route
5.4.5^h^Network congestion
5.4.6^h^Routing loop detected
5.4.7^h^Delivery time expired
5.4.8^h^Loop detected, check recipient policy. (Source: Microsoft)
5.5.0^h^Other or undefined protocol status
5.5.1^h^Invalid command
5.5.2^h^Syntax error
5.5.3^h^Too many recipients
5.5.4^h^Invalid command arguments
5.5.5^h^Wrong protocol version
5.6.0^b^Other or undefined media error
5.6.1^b^Media not supported
5.6.2^h^Conversion required and prohibited
5.6.3^h^Conversion required but not supported
5.6.4^h^Conversion with loss performed
5.6.5^h^Conversion failed
5.7.0^h^Other or undefined security status
5.7.1^b^Delivery not authorized, message refused
5.7.2^h^Mailing list expansion prohibited
5.7.3^h^Security conversion required but not possible
5.7.4^h^Security features not supported
5.7.5^b^Cryptographic failure
5.7.6^b^Cryptographic algorithm not supported
5.7.7^b^Message integrity failure
# persistent transient failure
4.0.0^s^Other undefined status
4.1.0^s^Other address status
4.1.4^s^Destination mailbox address ambiguous
4.1.5^s^Destination mailbox address valid
4.1.7^s^Bad sender's mailbox address syntax
4.1.8^s^Bad sender's system address
4.2.0^s^Other or undefined mailbox status
4.2.1^s^Mailbox disabled, not accepting messages
4.2.2^f^Mailbox full
4.2.4^s^Mailing list expansion problem
4.3.0^s^Other or undefined mail system status
4.3.1^s^Mail system full
4.3.2^s^System not accepting network messages
4.3.3^s^System not capable of selected features
4.3.5^s^System incorrectly configured
4.4.0^s^Other or undefined network or routing status
4.4.1^s^No answer from host
4.4.2^s^Bad connection
4.4.3^s^Routing server failure
4.4.4^s^Unable to route
4.4.5^s^Network congestion
4.4.6^s^Routing loop detected
4.4.7^s^Delivery time expired
4.5.0^s^Other or undefined protocol status
4.5.3^s^Too many recipients
4.5.5^s^Wrong protocol version
4.6.0^s^Other or undefined media error
4.6.2^s^Conversion required and prohibited
4.6.3^s^Conversion required but not supported
4.6.4^s^Conversion with loss performed
4.6.5^s^Conversion failed
4.7.0^s^Other or undefined security status
4.7.5^s^Cryptographic failure
4.7.6^s^Cryptographic algorithm not supported
4.7.7^s^Message integrity failure
# generic entries
.0.0^s^Other undefined status
.1.0^s^Other address status
.1.1^h^Bad destination mailbox address
.1.2^h^Bad destination system address
.1.3^h^Bad destination mailbox address syntax
.1.4^h^Destination mailbox address ambiguous
.1.5^k^Destination mailbox address valid
.1.6^h^Mailbox has moved
.1.7^s^Bad sender's mailbox address syntax
.1.8^s^Bad sender's system address
.2.0^s^Other or undefined mailbox status
.2.1^h^Mailbox disabled, not accepting messages
.2.2^f^Mailbox full
.2.3^l^Message length exceeds administrative limit.
.2.4^s^Mailing list expansion problem
.3.0^s^Other or undefined mail system status
.3.1^s^Mail system full
.3.2^s^System not accepting network messages
.3.3^s^System not capable of selected features
.3.4^l^Message too big for system
.3.5^s^System incorrectly configured
.4.0^s^Other or undefined network or routing status
.4.1^s^No answer from host
.4.2^s^Bad connection
.4.3^s^Routing server failure
.4.4^s^Unable to route
.4.5^s^Network congestion
.4.6^s^Routing loop detected
.4.7^s^Delivery time expired
.5.0^s^Other or undefined protocol status
.5.1^h^Invalid command
.5.2^h^Syntax error
.5.3^s^Too many recipients
.5.4^h^Invalid command arguments
.5.5^s^Wrong protocol version
.6.0^s^Other or undefined media error
.6.1^s^Media not supported
.6.2^s^Conversion required and prohibited
.6.3^s^Conversion required but not supported
.6.4^s^Conversion with loss performed
.6.5^s^Conversion failed
.7.0^s^Other or undefined security status
.7.1^b^Delivery not authorized, message refused
.7.2^h^Mailing list expansion prohibited
.7.3^h^Security conversion required but not possible
.7.4^h^Security features not supported
.7.5^s^Cryptographic failure
.7.6^s^Cryptographic algorithm not supported
.7.7^s^Message integrity failure

2) Rfc2821.properties:

# RFC2821 reply code and description
# reply code = xyz
# 1yz Positive Preliminary reply
# 2yz   Positive Completion reply
# 3yz   Positive Intermediate reply
# 4yz   Transient Negative Completion reply
# 5yz   Permanent Negative Completion reply
#
# format: ReplyCode^Type^Description
# type = 
# Description: text enclosed in curly brackets should be further matched to prevent false positives.
#
211^k^System status, or system help reply
214^k^Help message
220^k^ Service ready
221^k^ Service closing transmission channel
250^k^Requested mail action okay, completed
251^k^User not local; will forward to 
252^k^Cannot VRFY user, but will accept message and attempt delivery
354^k^Start mail input; end with .
421^s^ Service not available, closing transmission channel {\bnot\s+available}
450^s^Requested mail action not taken: mailbox unavailable {\baction\s+not\s+taken}
451^s^Requested action aborted: local error in processing {\baction\s+aborted}
452^s^Requested action not taken: insufficient system storage {\baction\s+not\s+taken}
500^h^Syntax error, command unrecognized {\berror}
501^h^Syntax error in parameters or arguments {\berror}
502^h^Command not implemented {\bnot\s+implemented}
503^h^Bad sequence of commands {\bBad\s+sequence}
504^h^Command parameter not implemented {\bnot\s+implemented}
550^h^Requested action not taken: mailbox unavailable {\baction\s+not\s+taken}
551^h^User not local; please try  {\bnot\s+local}
552^f^Requested mail action aborted: exceeded storage allocation {\baction\s+aborted}
553^h^Requested action not taken: mailbox name not allowed {\baction\s+not\s+taken}
554^h^Transaction failed {\b(?:failed|delivery error)}
#
# *** Custom entries, not defined by RFC 2821 ***
#
422^f^{\b(?:mailbox|account)\b.{0,100}(?:storage|full|limit|quota)} mailbox full.
4xx^s^{\btemporary\s.{0,100}(?:failure|error)}, used to match undefined codes starting with 4
5xx^h^{\bpermanent\s.{0,100}(?:failure|error)}, used to match undefined codes starting with 5

2 comments:

  1. Thank you very much for your good code. Can you please tell me what is the StringUtil class you used here. I didn't see it in imports.

    Thank you,
    Vissu

    ReplyDelete
    Replies
    1. Please ignore my above one... I didn't see your old post.

      Now, It is clear here... http://javaclue.blogspot.in/2009/09/portable-java-mail-message-bean-part-5.html.

      Delete

Followers

About Me

An IT professional with more than 20 years of experience in enterprise computing. An Audio enthusiast designed and built DIY audio gears and speakers.