[cvs] expresso/expresso-web/WEB-INF/src/com/jcorporate/expresso/core/security/filters HtmlPlusURLFilter.java

JCorporate Ltd jcorp at jcorp2.servlets.net
Wed Aug 4 16:15:57 PDT 2004


Update of /home/javacorp/.cvs/expresso/expresso/expresso-web/WEB-INF/src/com/jcorporate/expresso/core/security/filters
In directory jcorp2.servlets.net:/tmp/cvs-serv28025/expresso-web/WEB-INF/src/com/jcorporate/expresso/core/security/filters

Modified Files:
	HtmlPlusURLFilter.java 
Log Message:
add special case for percent % in URL


Index: HtmlPlusURLFilter.java
===================================================================
RCS file: /home/javacorp/.cvs/expresso/expresso/expresso-web/WEB-INF/src/com/jcorporate/expresso/core/security/filters/HtmlPlusURLFilter.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** HtmlPlusURLFilter.java	28 Mar 2004 02:06:43 -0000	1.5
--- HtmlPlusURLFilter.java	4 Aug 2004 23:15:55 -0000	1.6
***************
*** 10,37 ****
  
  /**
!  * This class subclasses HtmlFilter,
!  * plus it creates anchor (<a>) tags for anything that starts with 'http://', 'www.', etc.
!  *
!  * This filter is best used for DISPLAY ONLY, NOT EDITING, like:
!  * Filter old = myDbObject.setFilterClass(new RawFilter());<br>
!  * String myDisplayText = myDbObject.getField("MyField"); <br>
!  * myDbObject.setFilterClass(old); // restore<br>
!  *
!  * This class is not recommended for use in the 'permanent' setting of DBObject.setStringFilterClass()
!  * because if you call getField() to EDIT the original text, an anchor tag would show up for editing,
!  * and if saved, the '<' character would then be stripped, since tags are filtered out of the database data by this filter.
!  *
!  * In my application, I use the RawFilter for getting editable, since my fields are presented as HTML text areas.
   *
!  * @author Larry Hamel
   */
  public class HtmlPlusURLFilter
          extends HtmlFilter {
  
      public static final String[] URL_TYPES = {
          "http://", "https://", "ftp://", "mailto:", "news:"
      };
  
!     // these will be prepended with 'http://"
      public static final String[] URL_INFORMAL_PREFIXES = {
          "www.", "www2."
--- 10,28 ----
  
  /**
!  * This class provides a filter implementation for HTML output, protecting against
!  * XSS exploits, plus it creates anchor (<a>) tags for anything that starts
!  * with 'http://', 'www.', etc.
   *
!  * @author Larry Hamel and Patti Schank
   */
  public class HtmlPlusURLFilter
          extends HtmlFilter {
  
+     // URL types to search for (to add anchor tag)
      public static final String[] URL_TYPES = {
          "http://", "https://", "ftp://", "mailto:", "news:"
      };
  
!     // These will be prepended with 'http://"
      public static final String[] URL_INFORMAL_PREFIXES = {
          "www.", "www2."
***************
*** 39,43 ****
  
      /**
!      * name for Setup value which decides if we are limiting anchor labels
       */
      public static final String MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
--- 30,34 ----
  
      /**
!      * Name for Setup value which decides if we are limiting anchor labels
       */
      public static final String MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
***************
*** 45,51 ****
  
      /**
!      * no-args constructor required
!      *
!      * @throws IllegalArgumentException
       */
      public HtmlPlusURLFilter()
--- 36,40 ----
  
      /**
!      * No-args constructor required
       */
      public HtmlPlusURLFilter()
***************
*** 54,61 ****
  
      /**
       * This filter HTML encodes all special characters defined by the replacement
!      * list.  If a particular character doesn't exist in the map, then the chracter
       * is passed appended into the result set.
-      * <p/>
       * If it does exist, then the value the special character maps to is appended
       * into the list instead.
--- 43,60 ----
  
      /**
+      * Constructor for passing strings and their replacements
+      *
+      * @param specialStringList Strings to replace
+      * @param replaceList       The replacement strings
+      */
+     public HtmlPlusURLFilter(String[] specialStringList, String[] replaceList)
+             throws IllegalArgumentException {
+         super(specialStringList, replaceList);
+     }
+ 
+     /**
       * This filter HTML encodes all special characters defined by the replacement
!      * list. If a particular character doesn't exist in the map, then the chracter
       * is passed appended into the result set.
       * If it does exist, then the value the special character maps to is appended
       * into the list instead.
***************
*** 77,82 ****
       * prepend http://.
       *
!      * @param url the url string
!      * @return the url with http:// prepended, if needed
       */
      public static String addHttpPrefixIfNeeded(String url) {
--- 76,81 ----
       * prepend http://.
       *
!      * @param url The url string
!      * @return The url with http:// prepended, if needed
       */
      public static String addHttpPrefixIfNeeded(String url) {
***************
*** 111,116 ****
       * after the dot (the domain). Add more tests here as appropriate.
       *
!      * @param url
!      * @return
       */
      public static boolean isValidUrl(String url) {
--- 110,115 ----
       * after the dot (the domain). Add more tests here as appropriate.
       *
!      * @param url The candidate URL to check
!      * @return True if the URL is valid
       */
      public static boolean isValidUrl(String url) {
***************
*** 191,196 ****
              String href = s.substring(hIndex, endIndex);
  
!             // XSS protection: cannot have '<', and we have already stripped for this in initial filtering.
!             // however, within anchor, cannot have equivalent hex or &lt; within URL, so that <script> cannot be entered
              // todo use String.replace() regexp in jdk1.4 after expresso raises requirements
              href = StringUtil.replaceAll(href, "&lt;", "");
--- 190,196 ----
              String href = s.substring(hIndex, endIndex);
  
!             // XSS protection: cannot have '<', and we have already stripped for this
!             // in initial filtering. However, within anchor, cannot have equivalent
!             // hex or &lt; within URL, so that <script> cannot be entered
              // todo use String.replace() regexp in jdk1.4 after expresso raises requirements
              href = StringUtil.replaceAll(href, "&lt;", "");
***************
*** 209,214 ****
              link.append("\" target=\"_blank\">");
  
!             // if string is long, use ellipses if this setup value is set
!             // unfortunately, we don't have access to dbname here
              String max = Setup.getValueUnrequired(DBConnection.DEFAULT_DB_CONTEXT_NAME, MAX_CHARS_IN_URL_LABEL);
              if (max != null) {
--- 209,214 ----
              link.append("\" target=\"_blank\">");
  
!             // If string is long, use ellipses if this setup value is set
!             // Unfortunately, we don't have access to dbname here
              String max = Setup.getValueUnrequired(DBConnection.DEFAULT_DB_CONTEXT_NAME, MAX_CHARS_IN_URL_LABEL);
              if (max != null) {
***************
*** 251,256 ****
       * Finds the end of a hyperlink
       *
!      * @param s     the string
!      * @param start the url's starting index
       */
      public static int findEndOfHref(String s, int start) {
--- 251,256 ----
       * Finds the end of a hyperlink
       *
!      * @param s     The string
!      * @param start The url's starting index
       */
      public static int findEndOfHref(String s, int start) {
***************
*** 266,276 ****
  
              /**
!              *  see RFC 2396
!              * ftp://ftp.isi.edu/in-notes/rfc2396.txt
               ;  /  ?  :  @  &  =  +
               $  ,  -  _"  .  !  ~  *  '  (  )
               %  #
               */
!             switch (c) {
                  case '.':
                  case ',':
--- 266,276 ----
  
              /**
!              * Legal punctuation in URLs (see RFC 2396
!              * ftp://ftp.isi.edu/in-notes/rfc2396.txt)
               ;  /  ?  :  @  &  =  +
               $  ,  -  _"  .  !  ~  *  '  (  )
               %  #
               */
!             switch (c) {  // legal punctuation in URLS
                  case '.':
                  case ',':
***************
*** 287,291 ****
                  case ':':
                  case '~':
-                 case '%':
                  case '+':
                  case ';':
--- 287,290 ----
***************
*** 294,302 ****
                  case '\'':
                  case '$':
!                     continue;
  
-                     // not in allowed punctuation--must be end of URL
                  default:
!                     return i;
              }
          }
--- 293,310 ----
                  case '\'':
                  case '$':
!                     continue; // legal punctuation in URL
! 
!                 case '%':
!                     // Special case, indicates a URL encoding follows
!                     // Malicious XSS could abuse encoding to slip scripts
!                     // through. Only allow encoding of safe hex characters
!                     if ((i < (end - 2)) && (isSafeURLEncoding(chars[i + 1], chars[i + 2]))) {
!                         continue;
!                     } else {
!                         return i; // unsafe URL encoding
!                     }
  
                  default:
!                     return i; // illegal punctuation--must be end of URL
              }
          }
***************
*** 304,306 ****
--- 312,356 ----
      }
  
+     /**
+      * Return true if we allow the given URL encoding (after a %).
+      * See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
+      * To stop XSS attacks, definitely don't allow:
+      * the less than symbol < (3C) and the greater than symbol > (3E)
+      * 00-1F hex (ascii control characters)
+      * 80-FF hex (non-ascii, by definition not legal)
+      * <p/>
+      * For extra safety, let's not allow the following (add later if needed)
+      * quote (%27), left paren (%28), right paren (%29)
+      * left bracket (7B), right bracket (7D)
+      * <p/>
+      * Okay to allow as encoded (might be misunderstood within URLS):s
+      * space (20), ! (21), " (22), # (23), $ (24)
+      * % (25), & (26), * (2A), + (2B), comma (2C)
+      * - (2D), period (2E), / (2F), : (3A), ; (3B),
+      * = (3D), ? (3F), @ (40)
+      * | (7C), \ (5C), ~ (7E)
+      * <p/>
+      * The following are also okay, but shouldn't be encoded anyway, so don't
+      * bother checking for these for now:
+      * digits: 30-39
+      * uppercase letters: 41 - 5A
+      * lowercase letters: 61 - 7A
+      */
+     private static boolean isSafeURLEncoding(char c1, char c2) {
+         String[] allowedEncodings = {"20", "21", "22", "23", "24", "25",
+                                      "26", "2A", "2B", "2C", "2D", "2E", "2F",
+                                      "3A", "3B", "3D", "3F", "40", "7C",
+                                      "5C", "7E"};
+ 
+         String encodedCharStr = String.valueOf(c1) + String.valueOf(c2);
+ 
+         // test for containment of safe encoding characters
+         for (int i = 0; i < allowedEncodings.length; i++) {
+             if (encodedCharStr.startsWith(allowedEncodings[i])) {
+                 return true;
+             }
+         }
+         // otherwise assume encoded characters are unsafe
+         return false;
+     }
  }



More information about the cvs mailing list