finish helper functions

This commit is contained in:
Vlasislav Kashin
2025-06-29 18:56:22 +03:00
parent 5f8bdd4b39
commit aa4220ed2d
16 changed files with 16910 additions and 6 deletions

View File

@@ -1,9 +1,12 @@
use crate::proto_gen::phonemetadata::PhoneNumberDesc;
/// Internal phonenumber matching API used to isolate the underlying
/// implementation of the matcher and allow different implementations to be
/// swapped in easily.
pub(crate) trait MatcherApi {
/// Returns whether the given national number (a string containing only decimal
/// digits) matches the national number pattern defined in the given
/// PhoneNumberDesc message.
fn match_national_number(number: &str, number_desc: &PhoneNumberDesc, allow_prefix_match: bool) -> bool;
fn match_national_number(&self, number: &str, number_desc: &PhoneNumberDesc, allow_prefix_match: bool) -> bool;
}

View File

@@ -1,2 +1,6 @@
mod shortnumberinfo;
mod interfaces;
/// This module is automatically generated from /resources/*.proto
mod proto_gen;
mod phonenumberutil;
mod regexp_cache;

View File

@@ -0,0 +1,103 @@
use strum::EnumIter;
use thiserror::Error;
/// INTERNATIONAL and NATIONAL formats are consistent with the definition
/// in ITU-T Recommendation E.123. However we follow local conventions such as
/// using '-' instead of whitespace as separators. For example, the number of
/// the Google Switzerland office will be written as "+41 44 668 1800" in
/// INTERNATIONAL format, and as "044 668 1800" in NATIONAL format. E164
/// format is as per INTERNATIONAL format but with no formatting applied e.g.
/// "+41446681800". RFC3966 is as per INTERNATIONAL format, but with all spaces
/// and other separating symbols replaced with a hyphen, and with any phone
/// number extension appended with ";ext=". It also will have a prefix of
/// "tel:" added, e.g. "tel:+41-44-668-1800".
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhoneNumberFormat {
E164,
International,
National,
RFC3966,
}
/// Type of phone numbers.
#[derive(Debug, EnumIter, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhoneNumberType {
FixedLine,
Mobile,
/// In some regions (e.g. the USA), it is impossible to distinguish between
/// fixed-line and mobile numbers by looking at the phone number itself.
FixedLineOrMobile,
/// Freephone lines
TollFree,
PremiumRate,
/// The cost of this call is shared between the caller and the recipient, and
/// is hence typically less than PREMIUM_RATE calls. See
/// http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
SharedCost,
/// Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
VoIP,
/// A personal number is associated with a particular person, and may be
/// routed to either a MOBILE or FIXED_LINE number. Some more information can
/// be found here: http://en.wikipedia.org/wiki/Personal_Numbers
PersonalNumber,
Pager,
/// Used for "Universal Access Numbers" or "Company Numbers". They may be
/// further routed to specific offices, but allow one number to be used for a
/// company.
UAN,
/// Used for "Voice Mail Access Numbers".
VoiceMail,
/// A phone number is of type UNKNOWN when it does not fit any of the known
/// patterns for a specific region.
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum MatchType {
InvalidNumber, // NOT_A_NUMBER in the java version.
NoMatch,
ShortNsnMatch,
NsnMatch,
ExactMatch,
}
// Separated enum ValidationResult into ValidationResult err and
// ValidationResultOk for using Result<Ok, Err>
/// Possible outcomes when testing if a PhoneNumber is possible.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Error)]
pub enum ValidationResultErr {
/// The number has an invalid country calling code.
#[error("The number has an invalid country calling code")]
InvalidCountryCode,
/// The number is shorter than all valid numbers for this region.
#[error("The number is shorter than all valid numbers for this region")]
TooShort,
/// The number is longer than the shortest valid numbers for this region,
/// shorter than the longest valid numbers for this region, and does not
/// itself have a number length that matches valid numbers for this region.
/// This can also be returned in the case where
/// IsPossibleNumberForTypeWithReason was called, and there are no numbers of
/// this type at all for this region.
#[error("\
The number is longer than the shortest valid numbers for this region,\
shorter than the longest valid numbers for this region, and does not\
itself have a number length that matches valid numbers for this region\
")]
InvalidLength,
/// The number is longer than all valid numbers for this region.
#[error("The number is longer than all valid numbers for this region")]
TooLong,
}
/// Possible outcomes when testing if a PhoneNumber is possible.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ValidNumberLenType {
/// The number length matches that of valid numbers for this region.
IsPossible,
/// The number length matches that of local numbers for this region only
/// (i.e. numbers that may be able to be dialled within an area, but do not
/// have all the information to be dialled from anywhere inside or outside
/// the country).
IsPossibleLocalOnly,
}

View File

@@ -0,0 +1,60 @@
// The minimum and maximum length of the national significant number.
pub const MIN_LENGTH_FOR_NSN: usize = 2;
// The ITU says the maximum length should be 15, but we have found longer
// numbers in Germany.
pub const MAX_LENGTH_FOR_NSN: usize = 17;
/// The maximum length of the country calling code.
pub const MAX_LENGTH_COUNTRY_CODE: usize = 3;
pub const PLUS_CHARS: &'static str = "+\u{FF0B}";
// Regular expression of acceptable punctuation found in phone numbers. This
// excludes punctuation found as a leading character only. This consists of
// dash characters, white space characters, full stops, slashes, square
// brackets, parentheses and tildes. It also includes the letter 'x' as that
// is found as a placeholder for carrier information in some phone numbers.
// Full-width variants are also present.
pub const VALID_PUNCTUATION: &'static str = "-x\
\u{2010}-\u{2015}\u{2212}\u{30FC}\u{FF0D}-\u{FF0F} \u{00A0}\
\u{00AD}\u{200B}\u{2060}\u{3000}()\u{FF08}\u{FF09}\u{FF3B}\
\u{FF3D}.[]/~\u{2053}\u{223C}";
// Regular expression of characters typically used to start a second phone
// number for the purposes of parsing. This allows us to strip off parts of
// the number that are actually the start of another number, such as for:
// (530) 583-6985 x302/x2303 -> the second extension here makes this actually
// two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove
// the second extension so that the first number is parsed correctly. The
// string preceding this is captured.
// This corresponds to SECOND_NUMBER_START in the java version.
pub const CAPTURE_UP_TO_SECOND_NUMBER_START: &'static str = r"(.*)[\\/] *x";
pub const REGION_CODE_FOR_NON_GEO_ENTITY: &'static str = "0001";
pub const PLUS_SIGN: &'static str = "+";
pub const STAR_SIGN: &'static str = "*";
pub const RFC3966_EXTN_PREFIX: &'static str = ";ext=";
pub const RFC3966_PREFIX: &'static str = "tel:";
pub const RFC3966_PHONE_CONTEXT: &'static str = ";phone-context=";
pub const RFC3966_ISDN_SUBADDRESS: &'static str = ";isub=";
pub const RFC3966_VISUAL_SEPARATOR: &'static str = r"[\-\.\(\)]?";
pub const DIGITS: &'static str = r"\p{Nd}";
pub const VALID_ALPHA: &'static str = "a-z";
pub const VALID_ALPHA_INCL_UPPERCASE: &'static str = "A-Za-z";
// Default extension prefix to use when formatting. This will be put in front of
// any extension component of the number, after the main national number is
// formatted. For example, if you wish the default extension formatting to be "
// extn: 3456", then you should specify " extn: " here as the default extension
// prefix. This can be overridden by region-specific preferences.
pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. ";
pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "0001";
// Optional full stop (.) or colon, followed by zero or more
// spaces/tabs/commas.
pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*";
pub const OPTIONAL_EXT_SUFFIX: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*";
pub const NANPA_COUNTRY_CODE: i32 = 1;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
mod helper_constants;
mod metadata;
pub(super) use helper_constants::{*};
pub(super) use metadata::METADATA;

View File

@@ -0,0 +1,452 @@
use std::collections::{HashMap, HashSet};
use protobuf::Message;
use strum::IntoEnumIterator;
use crate::{
interfaces::MatcherApi,
proto_gen::{
phonemetadata::{PhoneMetadata, PhoneMetadataCollection, PhoneNumberDesc},
phonenumber::PhoneNumber,
},
};
use super::{
PhoneNumberFormat, PhoneNumberType, ValidNumberLenType, ValidationResultErr,
helper_constants::{
METADATA, OPTIONAL_EXT_SUFFIX, PLUS_SIGN, POSSIBLE_CHARS_AFTER_EXT_LABEL,
POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL, RFC3966_EXTN_PREFIX, RFC3966_PREFIX,
},
};
/// Loads metadata from helper constants METADATA array
pub(super) fn load_compiled_metadata() -> Result<PhoneMetadataCollection, protobuf::Error> {
let result = PhoneMetadataCollection::parse_from_bytes(&METADATA)?;
Ok(result)
}
/// Returns a pointer to the description inside the metadata of the appropriate
/// type.
pub(super) fn get_number_desc_by_type(
metadata: &PhoneMetadata,
phone_number_type: PhoneNumberType,
) -> &PhoneNumberDesc {
match phone_number_type {
PhoneNumberType::PremiumRate => &metadata.premium_rate,
PhoneNumberType::TollFree => &metadata.toll_free,
PhoneNumberType::Mobile => &metadata.mobile,
PhoneNumberType::FixedLine | PhoneNumberType::FixedLineOrMobile => &metadata.fixed_line,
PhoneNumberType::SharedCost => &metadata.shared_cost,
PhoneNumberType::VoIP => &metadata.voip,
PhoneNumberType::PersonalNumber => &metadata.personal_number,
PhoneNumberType::Pager => &metadata.pager,
PhoneNumberType::UAN => &metadata.uan,
PhoneNumberType::VoiceMail => &metadata.voicemail,
// Instead of the default case, we only match `Unknown`
PhoneNumberType::Unknown => &metadata.general_desc,
}
}
/// A helper function that is used by Format and FormatByPattern.
pub(super) fn prefix_number_with_country_calling_code(
country_calling_code: i32,
number_format: PhoneNumberFormat,
formatted_number: &mut String,
) {
if let PhoneNumberFormat::National = number_format {
return;
}
let mut buf = itoa::Buffer::new();
let country_calling_code_str = buf.format(country_calling_code);
// we anyway allocate a new string in concatenation, so we'l do it once
// with capacity of resulting string
match number_format {
PhoneNumberFormat::E164 => {
let new_str =
fast_cat::concat_str!(PLUS_SIGN, country_calling_code_str, &formatted_number);
*formatted_number = new_str;
}
PhoneNumberFormat::International => {
let new_str =
fast_cat::concat_str!(PLUS_SIGN, country_calling_code_str, " ", &formatted_number);
*formatted_number = new_str;
}
PhoneNumberFormat::RFC3966 => {
let new_str = fast_cat::concat_str!(
RFC3966_PREFIX,
PLUS_SIGN,
country_calling_code_str,
"-",
&formatted_number
);
*formatted_number = new_str;
}
// here code is already returned
PhoneNumberFormat::National => {}
}
}
// Returns true when one national number is the suffix of the other or both are
// the same.
pub(super) fn is_national_number_suffix_of_the_other(
first_number: &PhoneNumber,
second_number: &PhoneNumber,
) -> bool {
let mut buf = itoa::Buffer::new();
let first_number_national_number = buf.format(first_number.national_number());
let mut buf = itoa::Buffer::new();
let second_number_national_number = buf.format(second_number.national_number());
// Note that HasSuffixString returns true if the numbers are equal.
return first_number_national_number.ends_with(second_number_national_number)
|| second_number_national_number.ends_with(first_number_national_number);
}
/// Helper method for constructing regular expressions for parsing. Creates an
/// expression that captures up to max_length digits.
pub(super) fn extn_digits(max_length: u32) -> String {
let mut buf = itoa::Buffer::new();
let max_length_str = buf.format(max_length);
const HELPER_STR_LEN: usize = 2 + 4 + 2;
let mut expr = String::with_capacity(
HELPER_STR_LEN + super::helper_constants::DIGITS.len() + max_length_str.len(),
);
expr.push_str("([");
// Fully qualify DIGITS const as its common name
expr.push_str(super::helper_constants::DIGITS);
expr.push_str("]{1,");
expr.push_str(max_length_str);
expr.push_str("})");
return expr;
}
// Helper initialiser method to create the regular-expression pattern to match
// extensions. Note that:
// - There are currently six capturing groups for the extension itself. If this
// number is changed, MaybeStripExtension needs to be updated.
// - The only capturing groups should be around the digits that you want to
// capture as part of the extension, or else parsing will fail!
pub(super) fn create_extn_pattern(for_parsing: bool) -> String {
// We cap the maximum length of an extension based on the ambiguity of the
// way the extension is prefixed. As per ITU, the officially allowed
// length for extensions is actually 40, but we don't support this since we
// haven't seen real examples and this introduces many false interpretations
// as the extension labels are not standardized.
let ext_limit_after_explicit_label = 20;
let ext_limit_after_likely_label = 15;
let ext_limit_after_ambiguous_char = 9;
let ext_limit_when_not_sure = 6;
// Canonical-equivalence doesn't seem to be an option with RE2, so we allow
// two options for representing any non-ASCII character like ó - the character
// itself, and one in the unicode decomposed form with the combining acute
// accent.
// Here the extension is called out in a more explicit way, i.e mentioning it
// obvious patterns like "ext.".
let explicit_ext_labels = "(?:e?xt(?:ensi(?:o\u{0301}?|\u{00F3}))?n?|(?:\u{FF45})?\u{FF58}\u{FF54}(?:\u{FF4E})?|\u{0434}\u{043E}\u{0431}|anexo)";
// One-character symbols that can be used to indicate an extension, and less
// commonly used or more ambiguous extension labels.
let ambiguous_ext_labels = "(?:[x\u{FF58}#\u{FF03}~\u{FF5E}]|int|\u{FF49}\u{FF4E}\u{FF54})";
// When extension is not separated clearly.
let ambiguous_separator = "[- ]+";
let rfc_extn = fast_cat::concat_str!(
RFC3966_EXTN_PREFIX,
&extn_digits(ext_limit_after_explicit_label)
);
let explicit_extn = fast_cat::concat_str!(
POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL,
explicit_ext_labels,
POSSIBLE_CHARS_AFTER_EXT_LABEL,
&extn_digits(ext_limit_after_explicit_label),
OPTIONAL_EXT_SUFFIX
);
let ambiguous_extn = fast_cat::concat_str!(
POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL,
ambiguous_ext_labels,
POSSIBLE_CHARS_AFTER_EXT_LABEL,
&extn_digits(ext_limit_after_ambiguous_char),
OPTIONAL_EXT_SUFFIX
);
let american_style_extn_with_suffix = fast_cat::concat_str!(
ambiguous_separator,
&extn_digits(ext_limit_when_not_sure),
"#"
);
// The first regular expression covers RFC 3966 format, where the extension is
// added using ";ext=". The second more generic where extension is mentioned
// with explicit labels like "ext:". In both the above cases we allow more
// numbers in extension than any other extension labels. The third one
// captures when single character extension labels or less commonly used
// labels are present. In such cases we capture fewer extension digits in
// order to reduce the chance of falsely interpreting two numbers beside each
// other as a number + extension. The fourth one covers the special case of
// American numbers where the extension is written with a hash at the end,
// such as "- 503#".
let extension_pattern = fast_cat::concat_str!(
&rfc_extn,
"|",
&explicit_extn,
"|",
&ambiguous_extn,
"|",
&american_style_extn_with_suffix
);
// Additional pattern that is supported when parsing extensions, not when
// matching.
if for_parsing {
// ",," is commonly used for auto dialling the extension when connected.
// Semi-colon works in Iphone and also in Android to pop up a button with
// the extension number following.
let auto_dialling_and_ext_labels_found = "(?:,{2}|;)";
// This is same as kPossibleSeparatorsBetweenNumberAndExtLabel, but not
// matching comma as extension label may have it.
let possible_separators_number_ext_label_no_comma = "[ \u{00A0}\t]*";
let auto_dialling_extn = fast_cat::concat_str!(
possible_separators_number_ext_label_no_comma,
auto_dialling_and_ext_labels_found,
POSSIBLE_CHARS_AFTER_EXT_LABEL,
&extn_digits(ext_limit_after_likely_label),
OPTIONAL_EXT_SUFFIX
);
let only_commas_extn = fast_cat::concat_str!(
possible_separators_number_ext_label_no_comma,
"(?:,)+",
POSSIBLE_CHARS_AFTER_EXT_LABEL,
&extn_digits(ext_limit_after_ambiguous_char),
OPTIONAL_EXT_SUFFIX
);
// Here the first pattern is exclusive for extension autodialling formats
// which are used when dialling and in this case we accept longer
// extensions. However, the second pattern is more liberal on number of
// commas that acts as extension labels, so we have strict cap on number of
// digits in such extensions.
return fast_cat::concat_str!(
&extension_pattern,
"|",
&auto_dialling_extn,
"|",
&only_commas_extn
);
}
return extension_pattern;
}
/// Normalizes a string of characters representing a phone number by replacing
/// all characters found in the accompanying map with the values therein, and
/// stripping all other characters if remove_non_matches is true.
///
/// Parameters:
/// * `number` - a pointer to a string of characters representing a phone number to
/// be normalized.
/// * `normalization_replacements` - a mapping of characters to what they should be
/// replaced by in the normalized version of the phone number
/// * `remove_non_matches` - indicates whether characters that are not able to be
/// replaced should be stripped from the number. If this is false, they will be
/// left unchanged in the number.
pub(super) fn normalize_helper(
normalization_replacements: &HashMap<char, char>,
remove_non_matches: bool,
phone_number: &mut String,
) {
let mut normalized_number = String::with_capacity(phone_number.len());
// Skip UTF checking because strings in rust are valid UTF-8 already
for phone_char in phone_number.chars() {
if let Some(replacement) = normalization_replacements.get(&phone_char) {
normalized_number.push(*replacement);
} else if !remove_non_matches {
normalized_number.push(phone_char);
}
// If neither of the above are true, we remove this character.
}
*phone_number = normalized_number;
}
/// Returns `true` if there is any possible number data set for a particular
/// PhoneNumberDesc.
pub(super) fn desc_has_possible_number_data(desc: &PhoneNumberDesc) -> bool {
// If this is empty, it means numbers of this type inherit from the "general
// desc" -> the value "-1" means that no numbers exist for this type.
return desc.possible_length.len() != 1
|| desc
.possible_length
.get(0)
.and_then(|l| Some(*l != -1))
.unwrap_or(false);
}
/// Note: `DescHasData` must account for any of MetadataFilter's
/// excludableChildFields potentially being absent from the metadata. It must
/// check them all. For any changes in `DescHasData`, ensure that all the
/// excludableChildFields are still being checked.
///
/// If your change is safe simply
/// mention why during a review without needing to change MetadataFilter.
///
/// Returns `true` if there is any data set for a particular PhoneNumberDesc.
pub(super) fn desc_has_data(desc: &PhoneNumberDesc) -> bool {
// Checking most properties since we don't know what's present, since a custom
// build may have stripped just one of them (e.g. USE_METADATA_LITE strips
// exampleNumber). We don't bother checking the PossibleLengthsLocalOnly,
// since if this is the only thing that's present we don't really support the
// type at all: no type-specific methods will work with only this data.
return desc.has_example_number()
|| desc_has_possible_number_data(desc)
|| desc.has_national_number_pattern();
}
/// Returns the types we have metadata for based on the PhoneMetadata object
/// passed in.
pub(super) fn get_supported_types_for_metadata(
metadata: &PhoneMetadata,
types: &mut HashSet<PhoneNumberType>,
) {
PhoneNumberType::iter()
// Never return FIXED_LINE_OR_MOBILE (it is a convenience type, and
// represents that a particular number type can't be
// determined) or UNKNOWN (the non-type).
.filter(|number_type| {
!matches!(
number_type,
PhoneNumberType::FixedLineOrMobile | PhoneNumberType::Unknown
)
})
.filter(|number_type| desc_has_data(get_number_desc_by_type(metadata, *number_type)))
.for_each(|number_type| {
types.insert(number_type);
});
}
/// Helper method to check a number against possible lengths for this number
/// type, and determine whether it matches, or is too short or too long.
pub(super) fn test_number_length(
phone_number: &str,
phone_metadata: &PhoneMetadata,
phone_number_type: PhoneNumberType,
) -> Result<ValidNumberLenType, ValidationResultErr> {
let desc_for_type = get_number_desc_by_type(phone_metadata, phone_number_type);
// There should always be "possibleLengths" set for every element. This is
// declared in the XML schema which is verified by
// PhoneNumberMetadataSchemaTest. For size efficiency, where a
// sub-description (e.g. fixed-line) has the same possibleLengths as the
// parent, this is missing, so we fall back to the general desc (where no
// numbers of the type exist at all, there is one possible length (-1) which
// is guaranteed not to match the length of any real phone number).
let mut possible_lengths = if desc_for_type.possible_length.len() == 0 {
phone_metadata.general_desc.possible_length.clone()
} else {
desc_for_type.possible_length.clone()
};
let mut local_lengths = desc_for_type.possible_length_local_only.clone();
if phone_number_type == PhoneNumberType::FixedLineOrMobile {
let fixed_line_desc = get_number_desc_by_type(phone_metadata, PhoneNumberType::FixedLine);
if !desc_has_possible_number_data(fixed_line_desc) {
// The rare case has been encountered where no fixedLine data is available
// (true for some non-geographical entities), so we just check mobile.
return test_number_length(phone_number, phone_metadata, PhoneNumberType::Mobile);
} else {
let mobile_desc = get_number_desc_by_type(phone_metadata, PhoneNumberType::Mobile);
if desc_has_possible_number_data(mobile_desc) {
// Merge the mobile data in if there was any. Note that when adding the
// possible lengths from mobile, we have to again check they aren't
// empty since if they are this indicates they are the same as the
// general desc and should be obtained from there.
// RUST NOTE: since merge adds elements to the end of the list, we can do the same
let len_to_append = if mobile_desc.possible_length.len() == 0 {
&phone_metadata.general_desc.possible_length
} else {
&mobile_desc.possible_length
};
possible_lengths.extend_from_slice(len_to_append);
possible_lengths.sort();
if local_lengths.len() == 0 {
local_lengths = mobile_desc.possible_length_local_only.clone();
} else {
local_lengths.extend_from_slice(&mobile_desc.possible_length_local_only);
local_lengths.sort();
}
}
}
}
// If the type is not suported at all (indicated by the possible lengths
// containing -1 at this point) we return invalid length.
if *possible_lengths.first().unwrap_or(&-1) == -1 {
return Err(ValidationResultErr::InvalidLength);
}
let actual_length = phone_number.len() as i32;
// This is safe because there is never an overlap beween the possible lengths
// and the local-only lengths; this is checked at build time.
if local_lengths.contains(&actual_length) {
return Ok(ValidNumberLenType::IsPossibleLocalOnly);
}
// here we can unwrap safe
let minimum_length = possible_lengths[0];
if minimum_length == actual_length {
return Ok(ValidNumberLenType::IsPossible);
} else if minimum_length > actual_length {
return Err(ValidationResultErr::TooShort);
} else if possible_lengths[possible_lengths.len() - 1] < actual_length {
return Err(ValidationResultErr::TooLong);
}
// We skip the first element; we've already checked it.
return if possible_lengths[1..].contains(&actual_length) {
Ok(ValidNumberLenType::IsPossible)
} else {
Err(ValidationResultErr::InvalidLength)
};
}
/// Helper method to check a number against possible lengths for this region,
/// based on the metadata being passed in, and determine whether it matches, or
/// is too short or too long.
pub(super) fn test_number_length_with_unknown_type(
phone_number: &str,
phone_metadata: &PhoneMetadata,
) -> Result<ValidNumberLenType, ValidationResultErr> {
return test_number_length(phone_number, phone_metadata, PhoneNumberType::Unknown);
}
/// Returns a new phone number containing only the fields needed to uniquely
/// identify a phone number, rather than any fields that capture the context in
/// which the phone number was created.
/// These fields correspond to those set in `parse()` rather than
/// `parse_and_keep_raw_input()`.
pub(crate) fn copy_core_fields_only(from_number: &PhoneNumber, to_number: &mut PhoneNumber) {
to_number.set_country_code(from_number.country_code());
to_number.set_national_number(from_number.national_number());
if let Some(extension) = &from_number.extension {
to_number.set_extension(extension.clone());
}
if from_number.italian_leading_zero() {
to_number.set_italian_leading_zero(true);
// This field is only relevant if there are leading zeros at all.
to_number.set_number_of_leading_zeros(from_number.number_of_leading_zeros());
}
}
/// Determines whether the given number is a national number match for the given
/// PhoneNumberDesc. Does not check against possible lengths!
pub(super) fn is_match(
matcher_api: Box<dyn MatcherApi>,
number: &str,
number_desc: &PhoneNumberDesc,
) -> bool {
matcher_api.match_national_number(number, number_desc, false)
}

View File

@@ -0,0 +1,24 @@
mod helper_constants;
pub mod helper_functions;
mod enums;
mod phonenumberutil;
mod regex_and_mappings;
pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ErrorType {
#[error("No parsing")]
NoParsingError,
#[error("Invalid country code")]
InvalidCountryCodeError, // INVALID_COUNTRY_CODE in the java version.
#[error("Not a number")]
NotANumber,
#[error("Too short after idd")]
TooShortAfterIdd,
#[error("Too short Nsn")]
TooShortNsn,
#[error("Too long nsn")]
TooLongNsn, // TOO_LONG in the java version.
}

View File

@@ -0,0 +1,34 @@
use std::sync::Arc;
use super::regex_and_mappings::PhoneNumberRegExpsAndMappings;
use crate::{interfaces::MatcherApi, proto_gen::phonemetadata::PhoneMetadata};
use dashmap::{DashMap, DashSet};
pub struct PhoneNumberUtil {
/// An API for validation checking.
matcher_api: Box<dyn MatcherApi>,
/// Helper class holding useful regular expressions and character mappings.
reg_exps: Arc<PhoneNumberRegExpsAndMappings>,
/// A mapping from a country calling code to a RegionCode object which denotes
/// the region represented by that country calling code. Note regions under
/// NANPA share the country calling code 1 and Russia and Kazakhstan share the
/// country calling code 7. Under this map, 1 is mapped to region code "US" and
/// 7 is mapped to region code "RU". This is implemented as a sorted vector to
/// achieve better performance.
country_calling_code_to_region_code_map: Vec<(i32, String)>,
/// The set of regions that share country calling code 1.
nanpa_regions: DashSet<String>,
/// A mapping from a region code to a PhoneMetadata for that region.
region_to_metadata_map: DashMap<String, PhoneMetadata>,
// A mapping from a country calling code for a non-geographical entity to the
// PhoneMetadata for that country calling code. Examples of the country
// calling codes include 800 (International Toll Free Service) and 808
// (International Shared Cost Service).
country_code_to_non_geographical_metadata_map: DashMap<u32, PhoneMetadata>,
}

View File

@@ -0,0 +1,3 @@
pub struct PhoneNumberRegExpsAndMappings{
}

1
src/proto_gen/mod.rs Normal file
View File

@@ -0,0 +1 @@
include!(concat!(env!("OUT_DIR"), "/proto_gen/mod.rs"));

37
src/regexp_cache.rs Normal file
View File

@@ -0,0 +1,37 @@
use std::sync::Arc;
use dashmap::DashMap;
use thiserror::Error;
#[derive(Debug, Error)]
#[error("An error occurred while trying to create regex: {0}")]
pub struct ErrorInvalidRegex(#[from] regex::Error);
pub struct RegexCache {
cache: DashMap<String, Arc<regex::Regex>>
}
impl RegexCache {
pub fn new() -> Self {
Self {
cache: DashMap::new(),
}
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
cache: DashMap::with_capacity(capacity),
}
}
pub fn get_regex(&self, pattern: &str) -> Result<Arc<regex::Regex>, ErrorInvalidRegex> {
if let Some(regex) = self.cache.get(pattern) {
Ok(regex.value().clone())
} else {
let entry = self.cache.entry(pattern.to_string()).or_try_insert_with(|| {
regex::Regex::new(pattern).map(Arc::new)
})?;
Ok(entry.value().clone())
}
}
}