返回
正则表达式教程:匹配中文/汉字
正则表达式
2024-02-28 16:01:57
一、正则解释
给定的正则表达式:
/^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])$/
含义:
^
:匹配字符串的开头。(?: ... )
:非捕获组,不捕获匹配内容。[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]
:匹配单字的中文字符。[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]
:匹配双字节的中文字符。\uD869[\uDC00-\uDED6\uDF00-\uDFFF]
:匹配 Unicode 扩展 A 中的双字节中文字符。\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]
:匹配 Unicode 扩展 B 中的双字节中文字符。\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]
:匹配 Unicode 扩展 C 中的双字节中文字符。\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]
:匹配 Unicode 扩展 D 中的双字节中文字符。\uD87A[\uDC00-\uDFE0]
:匹配 Unicode 扩展 E 中的双字节中文字符。$
:匹配字符串的结尾。
二、使用场景
此正则表达式主要用于匹配中文或汉字,常见场景有:
- 验证输入的姓名、地址或其他需要包含中文信息的字段。
- 从文本中提取中文部分。
- 过滤掉非中文内容。
三、代码示例
JavaScript
const regex = /^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])$/;
const input = "正则";
const result = regex.test(input);
if (result) {
console.log("匹配成功");
} else {
console.log("匹配失败");
}
Java
import java.util.regex.Pattern;
public class ChineseMatcher {
private static final String CHINESE_PATTERN = "^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])import java.util.regex.Pattern;
public class ChineseMatcher {
private static final String CHINESE_PATTERN = "^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])$";
public static boolean isChinese(String input) {
return Pattern.matches(CHINESE_PATTERN, input);
}
public static void main(String[] args) {
String input = "正则";
boolean result = isChinese(input);
System.out.println(result ? "匹配成功" : "匹配失败");
}
}
quot;;
public static boolean isChinese(String input) {
return Pattern.matches(CHINESE_PATTERN, input);
}
public static void main(String[] args) {
String input = "正则";
boolean result = isChinese(input);
System.out.println(result ? "匹配成功" : "匹配失败");
}
}
PHP
<?php
$pattern = '/^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])$/';
$input = "正则";
$result = preg_match($pattern, $input);
if ($result) {
echo "匹配成功";
} else {
echo "匹配失败";
}
Python
import re
pattern = r'^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])import re
pattern = r'^(?:[\u3400-\u4DB5\u4E00-\u9FEA\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0])$'
input = "正则"
result = re.match(pattern, input)
if result:
#x27;
input = "正则"
result = re.match(pattern, input)
if result: