Skip to content

Instantly share code, notes, and snippets.

@icejoywoo
Created August 23, 2018 04:43
Show Gist options
  • Save icejoywoo/b000a0eb109abe2ebb19c4eb41d4d495 to your computer and use it in GitHub Desktop.
Save icejoywoo/b000a0eb109abe2ebb19c4eb41d4d495 to your computer and use it in GitHub Desktop.

Revisions

  1. icejoywoo created this gist Aug 23, 2018.
    78 changes: 78 additions & 0 deletions CharUtil.java
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,78 @@
    import java.util.regex.Pattern;

    /**
    * 字符通用工具类
    *
    * @author <a href="http://www.micmiu.com">Michael Sun</a>;
    */
    public class CharUtil {

    /**
    * @param args
    */
    public static void main(String[] args) {
    String[] strArr = new String[] {
    "www.micmiu.com",
    "!@#$%^&amp;*()_+{}[]|\"'?/:;<>,.",
    "!¥……()——:;“”‘’《》,。?、",
    "不要啊",
    "やめて",
    "韩佳人",
    "한가인",
    };
    for (String str : strArr) {
    System.out.println("===========> 测试字符串:" + str);
    System.out.println("正则判断:" + isChineseByREG(str) + " -- " + isChineseByName(str));
    System.out.println("Unicode判断结果 :" + isChinese(str));
    System.out.println("详细判断列表:");
    char[] ch = str.toCharArray();
    for (char c : ch) {
    System.out.println(c + " --> " + (isChinese(c) ? "是" : "否"));
    }
    }
    }

    // 根据Unicode编码完美的判断中文汉字和符号
    private static boolean isChinese(char c) {
    Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
    return ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
    || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
    || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
    || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
    || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
    || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
    || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION;
    }

    // 完整的判断中文汉字和符号
    public static boolean isChinese(String strName) {
    char[] ch = strName.toCharArray();
    for (char c : ch) {
    if (isChinese(c)) {
    return true;
    }
    }
    return false;
    }

    // 只能判断部分CJK字符(CJK统一汉字)
    public static boolean isChineseByREG(String str) {
    if (str == null) {
    return false;
    }
    Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");
    return pattern.matcher(str.trim()).find();
    }

    // 只能判断部分CJK字符(CJK统一汉字)
    public static boolean isChineseByName(String str) {
    if (str == null) {
    return false;
    }
    // 大小写不同:\\p 表示包含,\\P 表示不包含
    // \\p{Cn} 的意思为 Unicode 中未被定义字符的编码,\\P{Cn} 就表示 Unicode中已经被定义字符的编码
    String reg = "\\p{InCJK Unified Ideographs}&&\\P{Cn}";
    Pattern pattern = Pattern.compile(reg);
    return pattern.matcher(str.trim()).find();
    }
    }