エンコード指定のできるURLEncoderクラス

JDKのURLEncoderクラスを修正したもの
(2001.05.12)
import java.io.*;
import java.net.*;
 
/*
* @(#)URLEncoder.java 1.18 00/02/02
*
* Copyright 1995-2000 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the proprietary information of Sun Microsystems, Inc.
* Use is subject to license terms.
*
*/
 
//package java.net;
 
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.io.IOException;
import java.util.BitSet;
 
 
/**
 * The class contains a utility method for converting a
 * <code>String</code> into a MIME format called
 * "<code>x-www-form-urlencoded</code>" format.
 * <p>
 * To convert a <code>String</code>, each character is examined in turn:
 * <ul>
 * <li>The ASCII characters '<code>a</code>' through '<code>z</code>',
 * '<code>A</code>' through '<code>Z</code>', '<code>0</code>'
 * through '<code>9</code>', and ".", "-",
 * "*", "_" remain the same.
 * <li>The space character '<code> </code>' is converted into a
 * plus sign '<code>+</code>'.
 * <li>All other characters are converted into the 3-character string
 * "<code>%<i>xy</i></code>", where <i>xy</i> is the two-digit
 * hexadecimal representation of the lower 8-bits of the character.
 * </ul>
 *
 * @author Herb Jellinek
 * @version 1.18, 02/02/00
 * @since JDK1.0
 */
public class URLEncoder2 {
    static BitSet dontNeedEncoding;
    static final int caseDiff = ('a' - 'A');
    
    /* The list of characters that are not encoded have been determined by
    referencing O'Reilly's "HTML: The Definitive Guide" (page 164). */
    
    static {
        dontNeedEncoding = new BitSet(256);
        int i;
        for (i = 'a'; i <= 'z'; i++) {
            dontNeedEncoding.set(i);
        }
        for (i = 'A'; i <= 'Z'; i++) {
            dontNeedEncoding.set(i);
        }
        for (i = '0'; i <= '9'; i++) {
            dontNeedEncoding.set(i);
        }
        dontNeedEncoding.set(' '); /* encoding a space to a + is done in the encode() method */
        dontNeedEncoding.set('-');
        dontNeedEncoding.set('_');
        dontNeedEncoding.set('.');
        dontNeedEncoding.set('*');
    }
    
    /**
     * You can't call the constructor.
     */
    private URLEncoder2() { }
    
    /**
     * Translates a string into <code>x-www-form-urlencoded</code> format.
     *
     * @param s <code>String</code> to be translated.
     * @return the translated <code>String</code>.
     */
    public static String encode(String s, String enc) throws Exception {
        int maxBytesPerChar = 10;
        StringBuffer out = new StringBuffer(s.length());
        ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
/* Del sha '01.05.10
        OutputStreamWriter writer = new OutputStreamWriter(buf);
...Del sha '01.05.10 */
// Add sha '01.05.10
        OutputStreamWriter writer = new OutputStreamWriter(buf, enc);
// ...Add sha '01.05.10
        
        for (int i = 0; i < s.length(); i++) {
            int c = (int)s.charAt(i);
            if (dontNeedEncoding.get(c)) {
                if (c == ' ') {
                    c = '+';
                }
                out.append((char)c);
            } else {
                // convert to external encoding before hex conversion
                try {
                    writer.write(c);
                    writer.flush();
                } catch(IOException e) {
                    buf.reset();
                    continue;
                }
                byte[] ba = buf.toByteArray();
                for (int j = 0; j < ba.length; j++) {
                    out.append('%');
                    char ch = Character.forDigit((ba[j] >> 4) & 0xF, 16);
                    // converting to use uppercase letter as part of
                    // the hex value if ch is a letter.
                    if (Character.isLetter(ch)) {
                        ch -= caseDiff;
                    }
                    out.append(ch);
                    ch = Character.forDigit(ba[j] & 0xF, 16);
                    if (Character.isLetter(ch)) {
                        ch -= caseDiff;
                    }
                    out.append(ch);
                }
                buf.reset();
            }
        }
        
        return out.toString();
    }
    
    // デバッグ用
    public static void main(String[] args) {
        // String src = "草津";
        String str = "横浜";
        debugEncodeAndDecode(str, "UTF-8");
        debugEncodeAndDecode(str, "UTF-16");
        debugEncodeAndDecode(str, "EUC-JP");
        debugEncodeAndDecode(str, "Shift_JIS");
    }
    
    public static void debugEncodeAndDecode(String str, String enc) {
        PrintWriter out = new PrintWriter(System.out);
        
        try {
            String src = str;
            String dst;
            
            out.println("--- URLEncoder2.encode(\"" + src + "\", \"" + enc + "\"); ---");
            out.println("変換前 : " + src);
            dst = URLEncoder2.encode(src, enc);
            out.println("変換後 : " + dst);
            src = URLDecoder2.decode(dst, enc);
            out.println("再変換 : " + src);
            out.println("");
        }
        catch (Exception e) {
            out.println("例外が発生しました!!");
        }
        
        out.flush();
    }
}
一覧に戻る
© 2003 WAC.com All Right Reserved.