Created
August 23, 2018 04:43
-
-
Save icejoywoo/b000a0eb109abe2ebb19c4eb41d4d495 to your computer and use it in GitHub Desktop.
中文字符判断
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.util.regex.Pattern; | |
| /** | |
| * 字符通用工具类 | |
| * | |
| * @author <a href="http://www.micmiu.com">Michael Sun</a>; | |
| */ | |
| public class CharUtil { | |
| /** | |
| * @param args | |
| */ | |
| public static void main(String[] args) { | |
| String[] strArr = new String[] { | |
| "www.micmiu.com", | |
| "!@#$%^&*()_+{}[]|\"'?/:;<>,.", | |
| "!¥……()——:;“”‘’《》,。?、", | |
| "不要啊", | |
| "やめて", | |
| "韩佳人", | |
| "한가인", | |
| }; | |
| for (String str : strArr) { | |
| System.out.println("===========> 测试字符串:" + str); | |
| System.out.println("正则判断:" + isChineseByREG(str) + " -- " + isChineseByName(str)); | |
| System.out.println("Unicode判断结果 :" + isChinese(str)); | |
| System.out.println("详细判断列表:"); | |
| char[] ch = str.toCharArray(); | |
| for (char c : ch) { | |
| System.out.println(c + " --> " + (isChinese(c) ? "是" : "否")); | |
| } | |
| } | |
| } | |
| // 根据Unicode编码完美的判断中文汉字和符号 | |
| private static boolean isChinese(char c) { | |
| Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); | |
| return ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS | |
| || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS | |
| || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A | |
| || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B | |
| || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION | |
| || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS | |
| || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION; | |
| } | |
| // 完整的判断中文汉字和符号 | |
| public static boolean isChinese(String strName) { | |
| char[] ch = strName.toCharArray(); | |
| for (char c : ch) { | |
| if (isChinese(c)) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| // 只能判断部分CJK字符(CJK统一汉字) | |
| public static boolean isChineseByREG(String str) { | |
| if (str == null) { | |
| return false; | |
| } | |
| Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+"); | |
| return pattern.matcher(str.trim()).find(); | |
| } | |
| // 只能判断部分CJK字符(CJK统一汉字) | |
| public static boolean isChineseByName(String str) { | |
| if (str == null) { | |
| return false; | |
| } | |
| // 大小写不同:\\p 表示包含,\\P 表示不包含 | |
| // \\p{Cn} 的意思为 Unicode 中未被定义字符的编码,\\P{Cn} 就表示 Unicode中已经被定义字符的编码 | |
| String reg = "\\p{InCJK Unified Ideographs}&&\\P{Cn}"; | |
| Pattern pattern = Pattern.compile(reg); | |
| return pattern.matcher(str.trim()).find(); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment