added more functions from original code, added macro

This commit is contained in:
Vlasislav Kashin
2025-07-02 18:02:47 +03:00
parent 929fdbae8a
commit d0fb47705a
14 changed files with 1177 additions and 41 deletions

3
src/i18n/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
mod region_code;
pub use region_code::RegionCode;

13
src/i18n/region_code.rs Normal file
View File

@@ -0,0 +1,13 @@
pub struct RegionCode {
}
impl RegionCode {
/// Returns a region code string representing the "unknown" region.
pub fn get_unknown() -> &'static str {
return Self::zz();
}
pub fn zz() -> &'static str {
return "ZZ";
}
}

View File

@@ -4,7 +4,7 @@ use crate::proto_gen::phonemetadata::PhoneNumberDesc;
/// implementation of the matcher and allow different implementations to be
/// swapped in easily.
pub(crate) trait MatcherApi {
pub(crate) trait MatcherApi: Send + Sync {
/// Returns whether the given national number (a string containing only decimal
/// digits) matches the national number pattern defined in the given
/// PhoneNumberDesc message.

View File

@@ -5,3 +5,11 @@ mod proto_gen;
mod phonenumberutil;
mod regexp_cache;
mod regex_based_matcher;
pub mod i18n;
pub mod regex_util;
/// I decided to create this module because there are many
/// boilerplate places in the code that can be replaced with macros,
/// the name of which will describe what is happening more
/// clearly than a few lines of code.
mod macros;

21
src/macros/mod.rs Normal file
View File

@@ -0,0 +1,21 @@
// std::borrow::Cow
// std::option::Option
/// This macro extracts owned value from cow
/// but if cow is borrowed it returns default given value
///
/// it's helpful when function returns `Cow<'_, T>` as result,
/// where `Cow::Borrowed` option marks that value was not modified
/// and we can use owned original instead of copying it.
macro_rules! owned_from_cow_or {
($getcow:expr, $default:expr) => {{
if let std::borrow::Cow::Owned(s) = $getcow {
s
} else {
$default
}
}};
}
pub(crate) use owned_from_cow_or;

View File

@@ -0,0 +1,9 @@
use thiserror::Error;
use crate::regexp_cache::ErrorInvalidRegex;
#[derive(Debug, PartialEq, Error)]
pub enum PhoneNumberUtilError {
#[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex)
}

View File

@@ -253,11 +253,13 @@ pub(super) fn create_extn_pattern(for_parsing: bool) -> String {
/// * `remove_non_matches` - indicates whether characters that are not able to be
/// replaced should be stripped from the number. If this is false, they will be
/// left unchanged in the number.
///
/// Returns: normalized_string
pub(super) fn normalize_helper(
normalization_replacements: &HashMap<char, char>,
remove_non_matches: bool,
phone_number: &mut String,
) {
phone_number: &str
) -> String {
let mut normalized_number = String::with_capacity(phone_number.len());
// Skip UTF checking because strings in rust are valid UTF-8 already
for phone_char in phone_number.chars() {
@@ -269,7 +271,7 @@ pub(super) fn normalize_helper(
// If neither of the above are true, we remove this character.
}
*phone_number = normalized_number;
normalized_number
}
/// Returns `true` if there is any possible number data set for a particular
@@ -307,7 +309,7 @@ pub(super) fn desc_has_data(desc: &PhoneNumberDesc) -> bool {
/// Returns the types we have metadata for based on the PhoneMetadata object
/// passed in.
pub(super) fn get_supported_types_for_metadata(
pub(super) fn populate_supported_types_for_metadata(
metadata: &PhoneMetadata,
types: &mut HashSet<PhoneNumberType>,
) {
@@ -327,6 +329,13 @@ pub(super) fn get_supported_types_for_metadata(
});
}
pub(super) fn get_supported_types_for_metadata(metadata: &PhoneMetadata) -> HashSet<PhoneNumberType> {
const EFFECTIVE_NUMBER_TYPES: usize = 11 /* count */ - 2 /* filter type or unknown */;
let mut types = HashSet::with_capacity(EFFECTIVE_NUMBER_TYPES);
populate_supported_types_for_metadata(metadata, &mut types);
types
}
/// Helper method to check a number against possible lengths for this number
/// type, and determine whether it matches, or is too short or too long.
pub(super) fn test_number_length(
@@ -444,7 +453,7 @@ pub(crate) fn copy_core_fields_only(from_number: &PhoneNumber, to_number: &mut P
/// Determines whether the given number is a national number match for the given
/// PhoneNumberDesc. Does not check against possible lengths!
pub(super) fn is_match(
matcher_api: Box<dyn MatcherApi>,
matcher_api: &Box<dyn MatcherApi>,
number: &str,
number_desc: &PhoneNumberDesc,
) -> bool {

View File

@@ -1,13 +1,17 @@
mod helper_constants;
pub mod helper_functions;
mod errors;
mod enums;
mod phonenumberutil;
mod regex_and_mappings;
mod phone_number_regexps_and_mappings;
use std::sync::LazyLock;
pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType};
use thiserror::Error;
use crate::phonenumberutil::phonenumberutil::PhoneNumberUtil;
#[derive(Debug, Error)]
pub enum ErrorType {
#[error("No parsing")]
@@ -23,3 +27,7 @@ pub enum ErrorType {
#[error("Too long nsn")]
TooLongNsn, // TOO_LONG in the java version.
}
static PHONE_NUMBER_UTIL: LazyLock<PhoneNumberUtil> = LazyLock::new(|| {
PhoneNumberUtil::new()
});

View File

@@ -2,7 +2,11 @@ use std::collections::{HashMap, HashSet};
use regex::Regex;
use crate::{phonenumberutil::{helper_constants::{self, CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS, PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE, VALID_PUNCTUATION}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
use crate::{phonenumberutil::{helper_constants::{
CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS,
PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE,
VALID_PUNCTUATION
}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
pub(super) struct PhoneNumberRegExpsAndMappings {
/// Regular expression of viable phone numbers. This is location independent.
@@ -150,6 +154,17 @@ pub(super) struct PhoneNumberRegExpsAndMappings {
/// Regular expression of valid domainname for the phone-context parameter,
/// following the syntax defined in RFC3966.
pub rfc3966_domainname_pattern: Regex,
/// *Rust note*: It's for some reason calculated inside function in C++,
/// so, we move it here
///
/// A pattern that is used to determine if a numberFormat under
/// availableFormats is eligible to be used by the AYTF. It is eligible when
/// the format element under numberFormat contains groups of the dollar sign
/// followed by a single digit, separated by valid phone number punctuation.
/// This prevents invalid punctuation (such as the star sign in Israeli star
/// numbers) getting into the output of the AYTF.
pub is_format_eligible_as_you_type_formatting_regex: Regex
}
impl PhoneNumberRegExpsAndMappings {
@@ -200,6 +215,8 @@ impl PhoneNumberRegExpsAndMappings {
alpha_map.insert('X', '9');
alpha_map.insert('Y', '9');
alpha_map.insert('Z', '9');
// IMPORTANT: only uppercase letters like in Java version
self.alpha_mappings = alpha_map;
let mut combined_map = HashMap::with_capacity(100);
@@ -241,7 +258,7 @@ impl PhoneNumberRegExpsAndMappings {
self.all_plus_number_grouping_symbols = all_plus_number_groupings;
}
fn new() -> Self {
pub fn new() -> Self {
let alphanum = fast_cat::concat_str!(VALID_ALPHA_INCL_UPPERCASE, DIGITS);
let extn_patterns_for_parsing = create_extn_pattern(true);
let valid_phone_number = format!(
@@ -303,6 +320,9 @@ impl PhoneNumberRegExpsAndMappings {
rfc3966_domainname_pattern: Regex::new(
&format!("^({}\\.)*{}\\.?$", rfc3966_domainlabel, rfc3966_toplabel)
).unwrap(),
is_format_eligible_as_you_type_formatting_regex: Regex::new(
&format!("[{}]*\\$1[{}]*(\\$\\d[{}]*)*",VALID_PUNCTUATION, VALID_PUNCTUATION, VALID_PUNCTUATION)
).unwrap(),
};
instance.initialize_regexp_mappings();
instance

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +0,0 @@
pub struct PhoneNumberRegExpsAndMappings{
}

View File

@@ -1,6 +1,7 @@
use log::{error};
use super::regex_util::{RegexFullMatch, RegexConsume};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{self, RegexCache}};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
pub struct RegexBasedMatcher {
cache: RegexCache,
@@ -15,23 +16,15 @@ impl RegexBasedMatcher {
&self, phone_number: &str,
number_pattern: &str,
allow_prefix_match: bool
) -> Result<bool, regexp_cache::ErrorInvalidRegex> {
) -> Result<bool, ErrorInvalidRegex> {
let regexp = self.cache.get_regex(number_pattern)?;
// find first occurrence
if let Some(mat) = regexp.find(phone_number) {
// if first position is not matched none of scenarios are possible
if mat.start() != 0 {
return Ok(false);
}
// full match
if mat.end() == phone_number.len() {
return Ok(true);
} else if allow_prefix_match {
return Ok(true);
}
if allow_prefix_match {
Ok(regexp.consume_start(phone_number).is_some())
} else {
Ok(regexp.full_match(phone_number))
}
Ok(false)
}
}

91
src/regex_util.rs Normal file
View File

@@ -0,0 +1,91 @@
use std::borrow::Cow;
use regex::Regex;
pub trait RegexFullMatch {
/// Eq of C fullMatch
fn full_match(&self, s: &str) -> bool;
}
pub trait RegexConsume {
/// Eq of C Consume
fn consume_start<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
self.consume_start_capturing(s, &mut [])
}
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b;
fn find_and_consume<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
self.find_and_consume_capturing(s, &mut [])
}
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b;
}
trait RegexMatchStart {
// Eq of looking_at
fn match_start(&self, s: &str) -> bool;
}
impl RegexFullMatch for Regex {
fn full_match(&self, s: &str) -> bool {
let found = self.find(s);
if let Some(matched) = found {
return matched.start() == 0 && matched.end() == s.len();
}
false
}
}
impl RegexMatchStart for Regex {
fn match_start(&self, s: &str) -> bool {
let found = self.find(s);
if let Some(matched) = found {
return matched.start() == 0;
}
false
}
}
impl RegexConsume for Regex {
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b {
_consume(self, s, groups, true)
}
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
where 'a: 'b {
_consume(self, s, groups, false)
}
}
fn _consume<'a, 'b>(
r: &Regex, input: &'a str,
groups: &mut [&'b str], anchor_at_start: bool
) -> Option<Cow<'a, str>>
where 'a: 'b {
let captures = r.captures(input)?;
let full_capture = captures.get(0)?;
if anchor_at_start && full_capture.start() != 0 {
return None
}
// Check if expected groups count is leq
// captures.len includes full group (0), so take captures.len() - 1
if groups.len() > captures.len() - 1 {
return None;
}
// If less matches than expected - fail.
for i in 1..=groups.len() {
// Groups are counted from 1 rather than 0.
if let Some(capture) = captures.get(i) {
groups[i-1] = capture.as_str();
} else {
// should never happen
return None
}
}
Some(Cow::Borrowed(&input[full_capture.end()..]))
}

View File

@@ -3,7 +3,7 @@ use std::sync::Arc;
use dashmap::DashMap;
use thiserror::Error;
#[derive(Debug, Error)]
#[derive(Debug, PartialEq, Error)]
#[error("An error occurred while trying to create regex: {0}")]
pub struct ErrorInvalidRegex(#[from] regex::Error);