feat: update regex, bug fixes, add tests

This commit is contained in:
Vlasislav Kashin
2025-07-12 23:30:44 +03:00
parent 2fea8f1e20
commit ebe7d236e9
8 changed files with 1866 additions and 898 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}

View File

@@ -10,7 +10,7 @@ build = "build/rust_build.rs"
log = "0.4.27" log = "0.4.27"
# helpful error package # helpful error package
thiserror = "2.0.12" thiserror = "2.0.12"
# google protobuf lib required to use .proto files from assets # protobuf lib required to use .proto files from assets
protobuf = "3.7.2" protobuf = "3.7.2"
# optimized concurrent map # optimized concurrent map
dashmap = "6.1.0" dashmap = "6.1.0"

View File

@@ -50,11 +50,11 @@ pub const VALID_ALPHA_INCL_UPPERCASE: &'static str = "A-Za-z";
// prefix. This can be overridden by region-specific preferences. // prefix. This can be overridden by region-specific preferences.
pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. "; pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. ";
pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "0001"; pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*";
// Optional full stop (.) or colon, followed by zero or more // Optional full stop (.) or colon, followed by zero or more
// spaces/tabs/commas. // spaces/tabs/commas.
pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*"; pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*";
pub const OPTIONAL_EXT_SUFFIX: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*"; pub const OPTIONAL_EXT_SUFFIX: &'static str = "#?";
pub const NANPA_COUNTRY_CODE: i32 = 1; pub const NANPA_COUNTRY_CODE: i32 = 1;

View File

@@ -307,13 +307,12 @@ impl PhoneNumberRegExpsAndMappings {
separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(), separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(),
extn_patterns_for_matching: create_extn_pattern(false), extn_patterns_for_matching: create_extn_pattern(false),
extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(), extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(),
valid_phone_number_pattern: Regex::new(&format!("(?i){}(?:{})?", valid_phone_number_pattern: Regex::new(&format!("(?i)(?:{})(?:{})?",
&valid_phone_number, &valid_phone_number,
extn_patterns_for_parsing &extn_patterns_for_parsing
)).unwrap(),
valid_alpha_phone_pattern: Regex::new(&format!("(?i)(?:.*?[{}]){{3}}",
VALID_ALPHA
)).unwrap(), )).unwrap(),
// from java
valid_alpha_phone_pattern: Regex::new("(?:.*?[A-Za-z]){3}.*").unwrap(),
// The first_group_capturing_pattern was originally set to $1 but there // The first_group_capturing_pattern was originally set to $1 but there
// are some countries for which the first group is not used in the // are some countries for which the first group is not used in the
// national pattern (e.g. Argentina) so the $1 group does not match // national pattern (e.g. Argentina) so the $1 group does not match

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1,2 @@
mod tests; mod tests;
pub(self) mod region_code;

160
src/tests/region_code.rs Normal file
View File

@@ -0,0 +1,160 @@
pub struct RegionCode {}
impl RegionCode {
pub fn ad() -> &'static str {
"AD"
}
pub fn ae() -> &'static str {
"AE"
}
pub fn am() -> &'static str {
"AM"
}
pub fn ao() -> &'static str {
"AO"
}
pub fn aq() -> &'static str {
"AQ"
}
pub fn ar() -> &'static str {
"AR"
}
pub fn au() -> &'static str {
"AU"
}
pub fn bb() -> &'static str {
"BB"
}
pub fn br() -> &'static str {
"BR"
}
pub fn bs() -> &'static str {
"BS"
}
pub fn by() -> &'static str {
"BY"
}
pub fn ca() -> &'static str {
"CA"
}
pub fn ch() -> &'static str {
"CH"
}
pub fn cl() -> &'static str {
"CL"
}
pub fn cn() -> &'static str {
"CN"
}
pub fn co() -> &'static str {
"CO"
}
pub fn cs() -> &'static str {
"CS"
}
pub fn cx() -> &'static str {
"CX"
}
pub fn de() -> &'static str {
"DE"
}
pub fn fr() -> &'static str {
"FR"
}
pub fn gb() -> &'static str {
"GB"
}
pub fn hu() -> &'static str {
"HU"
}
pub fn it() -> &'static str {
"IT"
}
pub fn jp() -> &'static str {
"JP"
}
pub fn kr() -> &'static str {
"KR"
}
pub fn mx() -> &'static str {
"MX"
}
pub fn nz() -> &'static str {
"NZ"
}
pub fn pl() -> &'static str {
"PL"
}
pub fn re() -> &'static str {
"RE"
}
pub fn ru() -> &'static str {
"RU"
}
pub fn se() -> &'static str {
"SE"
}
pub fn sg() -> &'static str {
"SG"
}
pub fn un001() -> &'static str {
"001"
}
pub fn us() -> &'static str {
"US"
}
pub fn uz() -> &'static str {
"UZ"
}
pub fn yt() -> &'static str {
"YT"
}
pub fn zw() -> &'static str {
"ZW"
}
/// s a region code string representing the "unknown" region.
pub fn get_unknown() -> &'static str {
Self::zz()
}
pub fn zz() -> &'static str {
"ZZ"
}
}

File diff suppressed because it is too large Load Diff