added more functions from original code, added macro
This commit is contained in:
3
src/i18n/mod.rs
Normal file
3
src/i18n/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
mod region_code;
|
||||
|
||||
pub use region_code::RegionCode;
|
||||
13
src/i18n/region_code.rs
Normal file
13
src/i18n/region_code.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
pub struct RegionCode {
|
||||
}
|
||||
|
||||
impl RegionCode {
|
||||
/// Returns a region code string representing the "unknown" region.
|
||||
pub fn get_unknown() -> &'static str {
|
||||
return Self::zz();
|
||||
}
|
||||
|
||||
pub fn zz() -> &'static str {
|
||||
return "ZZ";
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@ use crate::proto_gen::phonemetadata::PhoneNumberDesc;
|
||||
/// implementation of the matcher and allow different implementations to be
|
||||
/// swapped in easily.
|
||||
|
||||
pub(crate) trait MatcherApi {
|
||||
pub(crate) trait MatcherApi: Send + Sync {
|
||||
/// Returns whether the given national number (a string containing only decimal
|
||||
/// digits) matches the national number pattern defined in the given
|
||||
/// PhoneNumberDesc message.
|
||||
|
||||
@@ -5,3 +5,11 @@ mod proto_gen;
|
||||
mod phonenumberutil;
|
||||
mod regexp_cache;
|
||||
mod regex_based_matcher;
|
||||
pub mod i18n;
|
||||
pub mod regex_util;
|
||||
|
||||
/// I decided to create this module because there are many
|
||||
/// boilerplate places in the code that can be replaced with macros,
|
||||
/// the name of which will describe what is happening more
|
||||
/// clearly than a few lines of code.
|
||||
mod macros;
|
||||
21
src/macros/mod.rs
Normal file
21
src/macros/mod.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
// std::borrow::Cow
|
||||
// std::option::Option
|
||||
|
||||
/// This macro extracts owned value from cow
|
||||
/// but if cow is borrowed it returns default given value
|
||||
///
|
||||
/// it's helpful when function returns `Cow<'_, T>` as result,
|
||||
/// where `Cow::Borrowed` option marks that value was not modified
|
||||
/// and we can use owned original instead of copying it.
|
||||
macro_rules! owned_from_cow_or {
|
||||
($getcow:expr, $default:expr) => {{
|
||||
if let std::borrow::Cow::Owned(s) = $getcow {
|
||||
s
|
||||
} else {
|
||||
$default
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
pub(crate) use owned_from_cow_or;
|
||||
|
||||
9
src/phonenumberutil/errors.rs
Normal file
9
src/phonenumberutil/errors.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::regexp_cache::ErrorInvalidRegex;
|
||||
|
||||
#[derive(Debug, PartialEq, Error)]
|
||||
pub enum PhoneNumberUtilError {
|
||||
#[error("{0}")]
|
||||
InvalidRegexError(#[from] ErrorInvalidRegex)
|
||||
}
|
||||
@@ -253,11 +253,13 @@ pub(super) fn create_extn_pattern(for_parsing: bool) -> String {
|
||||
/// * `remove_non_matches` - indicates whether characters that are not able to be
|
||||
/// replaced should be stripped from the number. If this is false, they will be
|
||||
/// left unchanged in the number.
|
||||
///
|
||||
/// Returns: normalized_string
|
||||
pub(super) fn normalize_helper(
|
||||
normalization_replacements: &HashMap<char, char>,
|
||||
remove_non_matches: bool,
|
||||
phone_number: &mut String,
|
||||
) {
|
||||
phone_number: &str
|
||||
) -> String {
|
||||
let mut normalized_number = String::with_capacity(phone_number.len());
|
||||
// Skip UTF checking because strings in rust are valid UTF-8 already
|
||||
for phone_char in phone_number.chars() {
|
||||
@@ -269,7 +271,7 @@ pub(super) fn normalize_helper(
|
||||
// If neither of the above are true, we remove this character.
|
||||
}
|
||||
|
||||
*phone_number = normalized_number;
|
||||
normalized_number
|
||||
}
|
||||
|
||||
/// Returns `true` if there is any possible number data set for a particular
|
||||
@@ -307,7 +309,7 @@ pub(super) fn desc_has_data(desc: &PhoneNumberDesc) -> bool {
|
||||
|
||||
/// Returns the types we have metadata for based on the PhoneMetadata object
|
||||
/// passed in.
|
||||
pub(super) fn get_supported_types_for_metadata(
|
||||
pub(super) fn populate_supported_types_for_metadata(
|
||||
metadata: &PhoneMetadata,
|
||||
types: &mut HashSet<PhoneNumberType>,
|
||||
) {
|
||||
@@ -327,6 +329,13 @@ pub(super) fn get_supported_types_for_metadata(
|
||||
});
|
||||
}
|
||||
|
||||
pub(super) fn get_supported_types_for_metadata(metadata: &PhoneMetadata) -> HashSet<PhoneNumberType> {
|
||||
const EFFECTIVE_NUMBER_TYPES: usize = 11 /* count */ - 2 /* filter type or unknown */;
|
||||
let mut types = HashSet::with_capacity(EFFECTIVE_NUMBER_TYPES);
|
||||
populate_supported_types_for_metadata(metadata, &mut types);
|
||||
types
|
||||
}
|
||||
|
||||
/// Helper method to check a number against possible lengths for this number
|
||||
/// type, and determine whether it matches, or is too short or too long.
|
||||
pub(super) fn test_number_length(
|
||||
@@ -444,7 +453,7 @@ pub(crate) fn copy_core_fields_only(from_number: &PhoneNumber, to_number: &mut P
|
||||
/// Determines whether the given number is a national number match for the given
|
||||
/// PhoneNumberDesc. Does not check against possible lengths!
|
||||
pub(super) fn is_match(
|
||||
matcher_api: Box<dyn MatcherApi>,
|
||||
matcher_api: &Box<dyn MatcherApi>,
|
||||
number: &str,
|
||||
number_desc: &PhoneNumberDesc,
|
||||
) -> bool {
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
mod helper_constants;
|
||||
pub mod helper_functions;
|
||||
mod errors;
|
||||
mod enums;
|
||||
mod phonenumberutil;
|
||||
mod regex_and_mappings;
|
||||
mod phone_number_regexps_and_mappings;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::phonenumberutil::phonenumberutil::PhoneNumberUtil;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ErrorType {
|
||||
#[error("No parsing")]
|
||||
@@ -23,3 +27,7 @@ pub enum ErrorType {
|
||||
#[error("Too long nsn")]
|
||||
TooLongNsn, // TOO_LONG in the java version.
|
||||
}
|
||||
|
||||
static PHONE_NUMBER_UTIL: LazyLock<PhoneNumberUtil> = LazyLock::new(|| {
|
||||
PhoneNumberUtil::new()
|
||||
});
|
||||
@@ -2,7 +2,11 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{phonenumberutil::{helper_constants::{self, CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS, PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE, VALID_PUNCTUATION}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
|
||||
use crate::{phonenumberutil::{helper_constants::{
|
||||
CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS,
|
||||
PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE,
|
||||
VALID_PUNCTUATION
|
||||
}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
|
||||
|
||||
pub(super) struct PhoneNumberRegExpsAndMappings {
|
||||
/// Regular expression of viable phone numbers. This is location independent.
|
||||
@@ -150,6 +154,17 @@ pub(super) struct PhoneNumberRegExpsAndMappings {
|
||||
/// Regular expression of valid domainname for the phone-context parameter,
|
||||
/// following the syntax defined in RFC3966.
|
||||
pub rfc3966_domainname_pattern: Regex,
|
||||
|
||||
/// *Rust note*: It's for some reason calculated inside function in C++,
|
||||
/// so, we move it here
|
||||
///
|
||||
/// A pattern that is used to determine if a numberFormat under
|
||||
/// availableFormats is eligible to be used by the AYTF. It is eligible when
|
||||
/// the format element under numberFormat contains groups of the dollar sign
|
||||
/// followed by a single digit, separated by valid phone number punctuation.
|
||||
/// This prevents invalid punctuation (such as the star sign in Israeli star
|
||||
/// numbers) getting into the output of the AYTF.
|
||||
pub is_format_eligible_as_you_type_formatting_regex: Regex
|
||||
}
|
||||
|
||||
impl PhoneNumberRegExpsAndMappings {
|
||||
@@ -200,6 +215,8 @@ impl PhoneNumberRegExpsAndMappings {
|
||||
alpha_map.insert('X', '9');
|
||||
alpha_map.insert('Y', '9');
|
||||
alpha_map.insert('Z', '9');
|
||||
// IMPORTANT: only uppercase letters like in Java version
|
||||
|
||||
self.alpha_mappings = alpha_map;
|
||||
|
||||
let mut combined_map = HashMap::with_capacity(100);
|
||||
@@ -241,7 +258,7 @@ impl PhoneNumberRegExpsAndMappings {
|
||||
self.all_plus_number_grouping_symbols = all_plus_number_groupings;
|
||||
}
|
||||
|
||||
fn new() -> Self {
|
||||
pub fn new() -> Self {
|
||||
let alphanum = fast_cat::concat_str!(VALID_ALPHA_INCL_UPPERCASE, DIGITS);
|
||||
let extn_patterns_for_parsing = create_extn_pattern(true);
|
||||
let valid_phone_number = format!(
|
||||
@@ -303,6 +320,9 @@ impl PhoneNumberRegExpsAndMappings {
|
||||
rfc3966_domainname_pattern: Regex::new(
|
||||
&format!("^({}\\.)*{}\\.?$", rfc3966_domainlabel, rfc3966_toplabel)
|
||||
).unwrap(),
|
||||
is_format_eligible_as_you_type_formatting_regex: Regex::new(
|
||||
&format!("[{}]*\\$1[{}]*(\\$\\d[{}]*)*",VALID_PUNCTUATION, VALID_PUNCTUATION, VALID_PUNCTUATION)
|
||||
).unwrap(),
|
||||
};
|
||||
instance.initialize_regexp_mappings();
|
||||
instance
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,3 +0,0 @@
|
||||
pub struct PhoneNumberRegExpsAndMappings{
|
||||
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
use log::{error};
|
||||
use super::regex_util::{RegexFullMatch, RegexConsume};
|
||||
|
||||
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{self, RegexCache}};
|
||||
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
|
||||
|
||||
pub struct RegexBasedMatcher {
|
||||
cache: RegexCache,
|
||||
@@ -15,23 +16,15 @@ impl RegexBasedMatcher {
|
||||
&self, phone_number: &str,
|
||||
number_pattern: &str,
|
||||
allow_prefix_match: bool
|
||||
) -> Result<bool, regexp_cache::ErrorInvalidRegex> {
|
||||
) -> Result<bool, ErrorInvalidRegex> {
|
||||
let regexp = self.cache.get_regex(number_pattern)?;
|
||||
|
||||
// find first occurrence
|
||||
if let Some(mat) = regexp.find(phone_number) {
|
||||
// if first position is not matched none of scenarios are possible
|
||||
if mat.start() != 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
// full match
|
||||
if mat.end() == phone_number.len() {
|
||||
return Ok(true);
|
||||
} else if allow_prefix_match {
|
||||
return Ok(true);
|
||||
}
|
||||
if allow_prefix_match {
|
||||
Ok(regexp.consume_start(phone_number).is_some())
|
||||
} else {
|
||||
Ok(regexp.full_match(phone_number))
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
91
src/regex_util.rs
Normal file
91
src/regex_util.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
pub trait RegexFullMatch {
|
||||
/// Eq of C fullMatch
|
||||
fn full_match(&self, s: &str) -> bool;
|
||||
}
|
||||
|
||||
pub trait RegexConsume {
|
||||
/// Eq of C Consume
|
||||
fn consume_start<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
|
||||
self.consume_start_capturing(s, &mut [])
|
||||
}
|
||||
|
||||
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
|
||||
where 'a: 'b;
|
||||
|
||||
fn find_and_consume<'a>(&self, s: &'a str) -> Option<Cow<'a, str>> {
|
||||
self.find_and_consume_capturing(s, &mut [])
|
||||
}
|
||||
|
||||
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
|
||||
where 'a: 'b;
|
||||
}
|
||||
|
||||
trait RegexMatchStart {
|
||||
// Eq of looking_at
|
||||
fn match_start(&self, s: &str) -> bool;
|
||||
}
|
||||
|
||||
impl RegexFullMatch for Regex {
|
||||
fn full_match(&self, s: &str) -> bool {
|
||||
let found = self.find(s);
|
||||
if let Some(matched) = found {
|
||||
return matched.start() == 0 && matched.end() == s.len();
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexMatchStart for Regex {
|
||||
fn match_start(&self, s: &str) -> bool {
|
||||
let found = self.find(s);
|
||||
if let Some(matched) = found {
|
||||
return matched.start() == 0;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexConsume for Regex {
|
||||
fn consume_start_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
|
||||
where 'a: 'b {
|
||||
_consume(self, s, groups, true)
|
||||
}
|
||||
|
||||
fn find_and_consume_capturing<'a, 'b>(&self, s: &'a str, groups: &mut [&'b str]) -> Option<Cow<'a, str>>
|
||||
where 'a: 'b {
|
||||
_consume(self, s, groups, false)
|
||||
}
|
||||
}
|
||||
|
||||
fn _consume<'a, 'b>(
|
||||
r: &Regex, input: &'a str,
|
||||
groups: &mut [&'b str], anchor_at_start: bool
|
||||
) -> Option<Cow<'a, str>>
|
||||
where 'a: 'b {
|
||||
let captures = r.captures(input)?;
|
||||
let full_capture = captures.get(0)?;
|
||||
if anchor_at_start && full_capture.start() != 0 {
|
||||
return None
|
||||
}
|
||||
// Check if expected groups count is leq
|
||||
// captures.len includes full group (0), so take captures.len() - 1
|
||||
if groups.len() > captures.len() - 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// If less matches than expected - fail.
|
||||
for i in 1..=groups.len() {
|
||||
// Groups are counted from 1 rather than 0.
|
||||
if let Some(capture) = captures.get(i) {
|
||||
groups[i-1] = capture.as_str();
|
||||
} else {
|
||||
// should never happen
|
||||
return None
|
||||
}
|
||||
}
|
||||
Some(Cow::Borrowed(&input[full_capture.end()..]))
|
||||
}
|
||||
@@ -3,7 +3,7 @@ use std::sync::Arc;
|
||||
use dashmap::DashMap;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[derive(Debug, PartialEq, Error)]
|
||||
#[error("An error occurred while trying to create regex: {0}")]
|
||||
pub struct ErrorInvalidRegex(#[from] regex::Error);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user