按照指定字符集以及字节数截取字符串

最新推荐文章于 2024-02-06 14:13:47 发布

原创最新推荐文章于 2024-02-06 14:13:47 发布 · 297 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#servlet #java #数据库

java 专栏收录该内容

4 篇文章

订阅专栏

文章提供两个Java方法，分别用于按照指定字节数截取UTF-8和GBK编码的字符串。通过检查字节序来确定中文字符的位置，从而正确地进行字符串截取。

首先看下数据库字符集:

select lengthb(“丁”) from dual 为3则为utf-8 为2则为gbk。


package bytes;

import java.io.UnsupportedEncodingException;

public class BytesUTFAndGBK {

    /**

    * 按照指定字节数截取utf-8字符串

    * @param str

    * @param num

    * @return

    * @throws UnsupportedEncodingException

    */

    public String cutUTF(String str,int num) throws UnsupportedEncodingException {

        String result = null;

        int count = 0;

        byte[] buf = str.getBytes("utf-8");

        for (int i = num-1;i > =0;i--){

            if (buf[i] < 0){

                count++;

            }else{

                break;

            }

        }

        if (count % 3 == 0){//utf-8中文占3个字节

            result = new String(buf,0,num,"utf-8");

        }else if (count % 3 == 1){

            result = new String(buf,0,num-1,"utf-8");

        }else{

            result = new String(buf,0,num-2,"utf-8");

        }

        return result;

    }

    /**

    * 按照指定字节数截取GBK字符串

    * @param str

    * @param num

    * @return

    * @throws UnsupportedEncodingException

    */

    public String cutGBK(String str,int num) throws UnsupportedEncodingException {

        String result = null;

        int count = 0;

        byte[] buf = str.getBytes("GBK");

        for (int i = num-1;i > =0;i--){

            if (buf[i] < 0){

                count++;

            }else{

                break;

            }

        }

        if (count % 2 == 0){//GBK中文占2个字节

            result = new String(buf,0,num,"utf-8");

        }else{

            result = new String(buf,0,num-1,"GBK");

        }

        return result;

    }

}