added more functions from original code, added macro

This commit is contained in:
Vlasislav Kashin
2025-07-02 18:02:47 +03:00
parent 929fdbae8a
commit d0fb47705a
14 changed files with 1177 additions and 41 deletions

3
src/i18n/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
mod region_code;
pub use region_code::RegionCode;

13
src/i18n/region_code.rs Normal file
View File

@@ -0,0 +1,13 @@
pub struct RegionCode {
}
impl RegionCode {
/// Returns a region code string representing the "unknown" region.
pub fn get_unknown() -> &'static str {
return Self::zz();
}
pub fn zz() -> &'static str {
return "ZZ";
}
}

View File

@@ -4,7 +4,7 @@ use crate::proto_gen::phonemetadata::PhoneNumberDesc;
/// implementation of the matcher and allow different implementations to be /// implementation of the matcher and allow different implementations to be
/// swapped in easily. /// swapped in easily.
pub(crate) trait MatcherApi { pub(crate) trait MatcherApi: Send + Sync {
/// Returns whether the given national number (a string containing only decimal /// Returns whether the given national number (a string containing only decimal
/// digits) matches the national number pattern defined in the given /// digits) matches the national number pattern defined in the given
/// PhoneNumberDesc message. /// PhoneNumberDesc message.

View File

@@ -4,4 +4,12 @@ mod interfaces;
mod proto_gen; mod proto_gen;
mod phonenumberutil; mod phonenumberutil;
mod regexp_cache; mod regexp_cache;
mod regex_based_matcher; mod regex_based_matcher;
pub mod i18n;
pub mod regex_util;
/// I decided to create this module because there are many
/// boilerplate places in the code that can be replaced with macros,
/// the name of which will describe what is happening more
/// clearly than a few lines of code.
mod macros;

21
src/macros/mod.rs Normal file
View File

@@ -0,0 +1,21 @@
// std::borrow::Cow
// std::option::Option
/// This macro extracts owned value from cow
/// but if cow is borrowed it returns default given value
///
/// it's helpful when function returns `Cow<'_, T>` as result,
/// where `Cow::Borrowed` option marks that value was not modified
/// and we can use owned original instead of copying it.
macro_rules! owned_from_cow_or {
($getcow:expr, $default:expr) => {{
if let std::borrow::Cow::Owned(s) = $getcow {
s
} else {
$default
}
}};
}
pub(crate) use owned_from_cow_or;

View File

@@ -0,0 +1,9 @@
use thiserror::Error;
use crate::regexp_cache::ErrorInvalidRegex;
#[derive(Debug, PartialEq, Error)]
pub enum PhoneNumberUtilError {
#[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex)
}

View File

@@ -253,11 +253,13 @@ pub(super) fn create_extn_pattern(for_parsing: bool) -> String {
/// * `remove_non_matches` - indicates whether characters that are not able to be /// * `remove_non_matches` - indicates whether characters that are not able to be
/// replaced should be stripped from the number. If this is false, they will be /// replaced should be stripped from the number. If this is false, they will be
/// left unchanged in the number. /// left unchanged in the number.
///
/// Returns: normalized_string
pub(super) fn normalize_helper( pub(super) fn normalize_helper(
normalization_replacements: &HashMap<char, char>, normalization_replacements: &HashMap<char, char>,
remove_non_matches: bool, remove_non_matches: bool,
phone_number: &mut String, phone_number: &str
) { ) -> String {
let mut normalized_number = String::with_capacity(phone_number.len()); let mut normalized_number = String::with_capacity(phone_number.len());
// Skip UTF checking because strings in rust are valid UTF-8 already // Skip UTF checking because strings in rust are valid UTF-8 already
for phone_char in phone_number.chars() { for phone_char in phone_number.chars() {
@@ -269,7 +271,7 @@ pub(super) fn normalize_helper(
// If neither of the above are true, we remove this character. // If neither of the above are true, we remove this character.
} }
*phone_number = normalized_number; normalized_number
} }
/// Returns `true` if there is any possible number data set for a particular /// Returns `true` if there is any possible number data set for a particular
@@ -307,7 +309,7 @@ pub(super) fn desc_has_data(desc: &PhoneNumberDesc) -> bool {
/// Returns the types we have metadata for based on the PhoneMetadata object /// Returns the types we have metadata for based on the PhoneMetadata object
/// passed in. /// passed in.
pub(super) fn get_supported_types_for_metadata( pub(super) fn populate_supported_types_for_metadata(
metadata: &PhoneMetadata, metadata: &PhoneMetadata,
types: &mut HashSet<PhoneNumberType>, types: &mut HashSet<PhoneNumberType>,
) { ) {
@@ -327,6 +329,13 @@ pub(super) fn get_supported_types_for_metadata(
}); });
} }
pub(super) fn get_supported_types_for_metadata(metadata: &PhoneMetadata) -> HashSet<PhoneNumberType> {
const EFFECTIVE_NUMBER_TYPES: usize = 11 /* count */ - 2 /* filter type or unknown */;
let mut types = HashSet::with_capacity(EFFECTIVE_NUMBER_TYPES);
populate_supported_types_for_metadata(metadata, &mut types);
types
}
/// Helper method to check a number against possible lengths for this number /// Helper method to check a number against possible lengths for this number
/// type, and determine whether it matches, or is too short or too long. /// type, and determine whether it matches, or is too short or too long.
pub(super) fn test_number_length( pub(super) fn test_number_length(
@@ -444,7 +453,7 @@ pub(crate) fn copy_core_fields_only(from_number: &PhoneNumber, to_number: &mut P
/// Determines whether the given number is a national number match for the given /// Determines whether the given number is a national number match for the given
/// PhoneNumberDesc. Does not check against possible lengths! /// PhoneNumberDesc. Does not check against possible lengths!
pub(super) fn is_match( pub(super) fn is_match(
matcher_api: Box<dyn MatcherApi>, matcher_api: &Box<dyn MatcherApi>,
number: &str, number: &str,
number_desc: &PhoneNumberDesc, number_desc: &PhoneNumberDesc,
) -> bool { ) -> bool {

View File

@@ -1,13 +1,17 @@
mod helper_constants; mod helper_constants;
pub mod helper_functions; pub mod helper_functions;
mod errors;
mod enums; mod enums;
mod phonenumberutil; mod phonenumberutil;
mod regex_and_mappings;
mod phone_number_regexps_and_mappings; mod phone_number_regexps_and_mappings;
use std::sync::LazyLock;
pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType}; pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType};
use thiserror::Error; use thiserror::Error;
use crate::phonenumberutil::phonenumberutil::PhoneNumberUtil;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum ErrorType { pub enum ErrorType {
#[error("No parsing")] #[error("No parsing")]
@@ -23,3 +27,7 @@ pub enum ErrorType {
#[error("Too long nsn")] #[error("Too long nsn")]
TooLongNsn, // TOO_LONG in the java version. TooLongNsn, // TOO_LONG in the java version.
} }
static PHONE_NUMBER_UTIL: LazyLock<PhoneNumberUtil> = LazyLock::new(|| {
PhoneNumberUtil::new()
});

View File

@@ -2,7 +2,11 @@ use std::collections::{HashMap, HashSet};
use regex::Regex; use regex::Regex;
use crate::{phonenumberutil::{helper_constants::{self, CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS, PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE, VALID_PUNCTUATION}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache}; use crate::{phonenumberutil::{helper_constants::{
CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS,
PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE,
VALID_PUNCTUATION
}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
pub(super) struct PhoneNumberRegExpsAndMappings { pub(super) struct PhoneNumberRegExpsAndMappings {
/// Regular expression of viable phone numbers. This is location independent. /// Regular expression of viable phone numbers. This is location independent.
@@ -150,6 +154,17 @@ pub(super) struct PhoneNumberRegExpsAndMappings {
/// Regular expression of valid domainname for the phone-context parameter, /// Regular expression of valid domainname for the phone-context parameter,
/// following the syntax defined in RFC3966. /// following the syntax defined in RFC3966.
pub rfc3966_domainname_pattern: Regex, pub rfc3966_domainname_pattern: Regex,
/// *Rust note*: It's for some reason calculated inside function in C++,
/// so, we move it here
///
/// A pattern that is used to determine if a numberFormat under
/// availableFormats is eligible to be used by the AYTF. It is eligible when
/// the format element under numberFormat contains groups of the dollar sign
/// followed by a single digit, separated by valid phone number punctuation.
/// This prevents invalid punctuation (such as the star sign in Israeli star
/// numbers) getting into the output of the AYTF.
pub is_format_eligible_as_you_type_formatting_regex: Regex
} }
impl PhoneNumberRegExpsAndMappings { impl PhoneNumberRegExpsAndMappings {
@@ -200,6 +215,8 @@ impl PhoneNumberRegExpsAndMappings {
alpha_map.insert('X', '9'); alpha_map.insert('X', '9');
alpha_map.insert('Y', '9'); alpha_map.insert('Y', '9');
alpha_map.insert('Z', '9'); alpha_map.insert('Z', '9');
// IMPORTANT: only uppercase letters like in Java version
self.alpha_mappings = alpha_map; self.alpha_mappings = alpha_map;
let mut combined_map = HashMap::with_capacity(100); let mut combined_map = HashMap::with_capacity(100);
@@ -241,7 +258,7 @@ impl PhoneNumberRegExpsAndMappings {
self.all_plus_number_grouping_symbols = all_plus_number_groupings; self.all_plus_number_grouping_symbols = all_plus_number_groupings;
} }
fn new() -> Self { pub fn new() -> Self {
let alphanum = fast_cat::concat_str!(VALID_ALPHA_INCL_UPPERCASE, DIGITS); let alphanum = fast_cat::concat_str!(VALID_ALPHA_INCL_UPPERCASE, DIGITS);
let extn_patterns_for_parsing = create_extn_pattern(true); let extn_patterns_for_parsing = create_extn_pattern(true);
let valid_phone_number = format!( let valid_phone_number = format!(
@@ -303,6 +320,9 @@ impl PhoneNumberRegExpsAndMappings {
rfc3966_domainname_pattern: Regex::new( rfc3966_domainname_pattern: Regex::new(
&format!("^({}\\.)*{}\\.?$", rfc3966_domainlabel, rfc3966_toplabel) &format!("^({}\\.)*{}\\.?$", rfc3966_domainlabel, rfc3966_toplabel)
).unwrap(), ).unwrap(),
is_format_eligible_as_you_type_formatting_regex: Regex::new(
&format!("[{}]*\\$1[{}]*(\\$\\d[{}]*)*",VALID_PUNCTUATION, VALID_PUNCTUATION, VALID_PUNCTUATION)
).unwrap(),
}; };
instance.initialize_regexp_mappings(); instance.initialize_regexp_mappings();
instance instance

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +0,0 @@
pub struct PhoneNumberRegExpsAndMappings{
}

View File

@@ -1,6 +1,7 @@
use log::{error}; use log::{error};
use super::regex_util::{RegexFullMatch, RegexConsume};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{self, RegexCache}}; use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
pub struct RegexBasedMatcher { pub struct RegexBasedMatcher {
cache: RegexCache, cache: RegexCache,
@@ -15,23 +16,15 @@ impl RegexBasedMatcher {
&self, phone_number: &str, &self, phone_number: &str,
number_pattern: &str, number_pattern: &str,
allow_prefix_match: bool allow_prefix_match: bool
) -> Result<bool, regexp_cache::ErrorInvalidRegex> { ) -> Result<bool, ErrorInvalidRegex> {
let regexp = self.cache.get_regex(number_pattern)?; let regexp = self.cache.get_regex(number_pattern)?;
// find first occurrence // find first occurrence
if let Some(mat) = regexp.find(phone_number) { if allow_prefix_match {
// if first position is not matched none of scenarios are possible Ok(regexp.consume_start(phone_number).is_some())
if mat.start() != 0 { } else {
return Ok(false); Ok(regexp.full_match(phone_number))
}
// full match
if mat.end() == phone_number.len() {
return Ok(true);
} else if allow_prefix_match {
return Ok(true);
}
} }
Ok(false)
} }
} }

91
src/regex_util.rs Normal file
View File

@@ -0,0 +1,91 @@
use std::borrow::Cow;
use regex::Regex;
pub trait RegexFullMatch {
/// Eq of C fullMatch
fn full_match(&self, s: &str) -> bool;
}
pub trait RegexConsume {
/// Eq of C Consume
fn consume_start<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
self.consume_start_capturing(s, &mut [])
}
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b;
fn find_and_consume<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
self.find_and_consume_capturing(s, &mut [])
}
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b;
}
trait RegexMatchStart {
// Eq of looking_at
fn match_start(&self, s: &str) -> bool;
}
impl RegexFullMatch for Regex {
fn full_match(&self, s: &str) -> bool {
let found = self.find(s);
if let Some(matched) = found {
return matched.start() == 0 && matched.end() == s.len();
}
false
}
}
impl RegexMatchStart for Regex {
fn match_start(&self, s: &str) -> bool {
let found = self.find(s);
if let Some(matched) = found {
return matched.start() == 0;
}
false
}
}
impl RegexConsume for Regex {
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b {
_consume(self, s, groups, true)
}
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b {
_consume(self, s, groups, false)
}
}
fn _consume<'a, 'b>(
r: &Regex, input: &'a str,
groups: &mut [&'b str], anchor_at_start: bool
) -> Option<Cow<'a, str>>
where 'a: 'b {
let captures = r.captures(input)?;
let full_capture = captures.get(0)?;
if anchor_at_start && full_capture.start() != 0 {
return None
}
// Check if expected groups count is leq
// captures.len includes full group (0), so take captures.len() - 1
if groups.len() > captures.len() - 1 {
return None;
}
// If less matches than expected - fail.
for i in 1..=groups.len() {
// Groups are counted from 1 rather than 0.
if let Some(capture) = captures.get(i) {
groups[i-1] = capture.as_str();
} else {
// should never happen
return None
}
}
Some(Cow::Borrowed(&input[full_capture.end()..]))
}

View File

@@ -3,7 +3,7 @@ use std::sync::Arc;
use dashmap::DashMap; use dashmap::DashMap;
use thiserror::Error; use thiserror::Error;
#[derive(Debug, Error)] #[derive(Debug, PartialEq, Error)]
#[error("An error occurred while trying to create regex: {0}")] #[error("An error occurred while trying to create regex: {0}")]
pub struct ErrorInvalidRegex(#[from] regex::Error); pub struct ErrorInvalidRegex(#[from] regex::Error);