Compare commits

...

9 Commits

Author SHA1 Message Date
Vlasislav Kashin
cb5f0d8fcc Refactor build script 2025-07-13 15:01:40 +03:00
Vlasislav Kashin
3a2e8e6c0f Move helper constants out of folder 2025-07-13 14:59:39 +03:00
Vlasislav Kashin
467416e3ef Update generated location 2025-07-13 14:58:49 +03:00
Vlasislav Kashin
1464119ff8 Better error naming 2025-07-13 14:52:06 +03:00
Vlasislav Kashin
beae04dee8 Add more tests, better error handling 2025-07-13 14:49:56 +03:00
Vlasislav Kashin
ebe7d236e9 feat: update regex, bug fixes, add tests 2025-07-12 23:30:44 +03:00
Vlasislav Kashin
2fea8f1e20 Phonenumberutil: add is_alpha_number 2025-07-12 21:21:32 +03:00
Vlasislav Kashin
392c793d5c Update phonenumberutil get_national_significant_number - &self reciever 2025-07-12 20:59:17 +03:00
Vlasislav Kashin
e7daffa6f7 helper_constants: fix const REGION_CODE_FOR_NON_GEO_ENTITY 2025-07-12 20:35:33 +03:00
24 changed files with 2938 additions and 957 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}

View File

@@ -10,7 +10,7 @@ build = "build/rust_build.rs"
log = "0.4.27" log = "0.4.27"
# helpful error package # helpful error package
thiserror = "2.0.12" thiserror = "2.0.12"
# google protobuf lib required to use .proto files from assets # protobuf lib required to use .proto files from assets
protobuf = "3.7.2" protobuf = "3.7.2"
# optimized concurrent map # optimized concurrent map
dashmap = "6.1.0" dashmap = "6.1.0"

View File

@@ -1,8 +1,9 @@
/** TODO: uncomment, move to macros and refactor
/** /**
* This file represents content of https://github.com/google/libphonenumber/tree/master/tools/cpp * This file represents content of https://github.com/google/libphonenumber/tree/master/tools/cpp
*/ */
use std::{collections::BTreeMap, fs::File, io::{BufRead, BufReader}, num::ParseIntError, path::Path};
use thiserror::Error; use thiserror::Error;
@@ -68,9 +69,9 @@ fn parse_prefixes(path: &str, prefixes: &mut BTreeMap<i32, String>) -> Result<()
Ok(()) Ok(())
} }
*/
fn main() {
fn main() -> Result<(), BuildError> {
protobuf_codegen::Codegen::new() protobuf_codegen::Codegen::new()
.pure() .pure()
.includes(["resources"]) .includes(["resources"])
@@ -78,5 +79,4 @@ fn main() -> Result<(), BuildError> {
.input("resources/phonenumber.proto") .input("resources/phonenumber.proto")
.cargo_out_dir("proto_gen") .cargo_out_dir("proto_gen")
.run_from_script(); .run_from_script();
Ok(())
} }

View File

@@ -0,0 +1,6 @@
mod metadata;
mod test_metadata;
pub use metadata::METADATA;
pub use test_metadata::TEST_METADATA;

2
src/generated/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod proto;
pub mod metadata;

View File

@@ -1,5 +1,5 @@
use crate::proto_gen::phonemetadata::PhoneNumberDesc; use crate::phonemetadata::PhoneNumberDesc;
/// Internal phonenumber matching API used to isolate the underlying /// Internal phonenumber matching API used to isolate the underlying
/// implementation of the matcher and allow different implementations to be /// implementation of the matcher and allow different implementations to be
/// swapped in easily. /// swapped in easily.

View File

@@ -1,7 +1,7 @@
mod shortnumberinfo; mod shortnumberinfo;
mod interfaces; mod interfaces;
/// This module is automatically generated from /resources/*.proto /// This module is automatically generated from /resources/*.proto
mod proto_gen; mod generated;
mod phonenumberutil; mod phonenumberutil;
mod regexp_cache; mod regexp_cache;
mod regex_based_matcher; mod regex_based_matcher;
@@ -30,6 +30,6 @@ pub use phonenumberutil::{
errors, errors,
enums, enums,
}; };
pub use proto_gen::phonemetadata; pub use generated::proto::phonemetadata;
pub use proto_gen::phonenumber; pub use generated::proto::phonenumber;
mod tests; mod tests;

View File

@@ -7,10 +7,10 @@ use crate::regexp_cache::ErrorInvalidRegex;
#[derive(Debug, PartialEq, Error)] #[derive(Debug, PartialEq, Error)]
pub enum InternalLogicError { pub enum InternalLogicError {
#[error("{0}")] #[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex), InvalidRegex(#[from] ErrorInvalidRegex),
#[error("{0}")] #[error("{0}")]
InvalidMetadataForValidRegionError(#[from] InvalidMetadataForValidRegionError) InvalidMetadataForValidRegion(#[from] InvalidMetadataForValidRegionError)
} }
#[derive(Debug, PartialEq, Error)] #[derive(Debug, PartialEq, Error)]
@@ -18,9 +18,9 @@ pub enum ParseError {
// Removed as OK variant // Removed as OK variant
// NoParsingError, // NoParsingError,
#[error("Invalid country code")] #[error("Invalid country code")]
InvalidCountryCodeError, // INVALID_COUNTRY_CODE in the java version. InvalidCountryCode, // INVALID_COUNTRY_CODE in the java version.
#[error("Not a number")] #[error("Not a number: {0}")]
NotANumber, NotANumber(#[from] NotANumberError),
#[error("Too short after idd")] #[error("Too short after idd")]
TooShortAfterIdd, TooShortAfterIdd,
#[error("Too short Nsn")] #[error("Too short Nsn")]
@@ -28,11 +28,19 @@ pub enum ParseError {
#[error("Too long nsn")] #[error("Too long nsn")]
TooLongNsn, // TOO_LONG in the java version. TooLongNsn, // TOO_LONG in the java version.
#[error("{0}")] #[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex), InvalidRegex(#[from] ErrorInvalidRegex),
}
#[derive(Debug, PartialEq, Error)]
pub enum NotANumberError {
#[error("Number not matched a valid number pattern")]
NotMatchedValidNumberPattern,
#[error("Invalid phone context")]
InvalidPhoneContext,
#[error("{0}")] #[error("{0}")]
ParseNumberAsIntError(#[from] ParseIntError), FailedToParseNumberAsInt(#[from] ParseIntError),
#[error("{0}")] #[error("{0}")]
ExtractNumberError(#[from] ExtractNumberError), FailedToExtractNumber(#[from] ExtractNumberError),
} }
#[derive(Debug, PartialEq, Error)] #[derive(Debug, PartialEq, Error)]
@@ -43,18 +51,24 @@ pub enum ExtractNumberError {
NotANumber, NotANumber,
} }
impl From<ExtractNumberError> for ParseError {
fn from(value: ExtractNumberError) -> Self {
NotANumberError::FailedToExtractNumber(value).into()
}
}
#[derive(Debug, PartialEq, Error)] #[derive(Debug, PartialEq, Error)]
pub enum GetExampleNumberError { pub enum GetExampleNumberError {
#[error("Parse error: {0}")] #[error("Parse error: {0}")]
ParseError(#[from] ParseError), FailedToParse(#[from] ParseError),
#[error("{0}")] #[error("{0}")]
InternalLogicError(#[from] InternalLogicError), Internal(#[from] InternalLogicError),
#[error("No example number")] #[error("No example number")]
NoExampleNumberError, NoExampleNumber,
#[error("Could not get number")] #[error("Could not get number")]
CouldNotGetNumberError, CouldNotGetNumber,
#[error("Invalid metadata")] #[error("Invalid metadata")]
InvalidMetadataError InvalidMetadata
} }

View File

@@ -1,3 +0,0 @@
pub mod metadata;
pub mod test_metadata;

View File

@@ -28,7 +28,7 @@ pub const VALID_PUNCTUATION: &'static str = "-x\
pub const CAPTURE_UP_TO_SECOND_NUMBER_START: &'static str = r"(.*)[\\/] *x"; pub const CAPTURE_UP_TO_SECOND_NUMBER_START: &'static str = r"(.*)[\\/] *x";
pub const REGION_CODE_FOR_NON_GEO_ENTITY: &'static str = "0001"; pub const REGION_CODE_FOR_NON_GEO_ENTITY: &'static str = "001";
pub const PLUS_SIGN: &'static str = "+"; pub const PLUS_SIGN: &'static str = "+";
pub const STAR_SIGN: &'static str = "*"; pub const STAR_SIGN: &'static str = "*";
@@ -50,11 +50,11 @@ pub const VALID_ALPHA_INCL_UPPERCASE: &'static str = "A-Za-z";
// prefix. This can be overridden by region-specific preferences. // prefix. This can be overridden by region-specific preferences.
pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. "; pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. ";
pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "0001"; pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*";
// Optional full stop (.) or colon, followed by zero or more // Optional full stop (.) or colon, followed by zero or more
// spaces/tabs/commas. // spaces/tabs/commas.
pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*"; pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*";
pub const OPTIONAL_EXT_SUFFIX: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*"; pub const OPTIONAL_EXT_SUFFIX: &'static str = "#?";
pub const NANPA_COUNTRY_CODE: i32 = 1; pub const NANPA_COUNTRY_CODE: i32 = 1;

View File

@@ -1,3 +0,0 @@
mod helper_constants;
pub(super) use helper_constants::{*};

View File

@@ -4,10 +4,10 @@ use protobuf::Message;
use strum::IntoEnumIterator; use strum::IntoEnumIterator;
use crate::{ use crate::{
interfaces::MatcherApi, phonenumberutil::generated::metadata::METADATA, proto_gen::{ interfaces::MatcherApi, generated::metadata::METADATA,
phonemetadata::{PhoneMetadata, PhoneMetadataCollection, PhoneNumberDesc}, phonemetadata::{PhoneMetadata, PhoneMetadataCollection, PhoneNumberDesc},
phonenumber::PhoneNumber, phonenumber::PhoneNumber,
}
}; };
use super::{ use super::{

View File

@@ -1,6 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use crate::proto_gen::phonenumber::phone_number::CountryCodeSource; use crate::phonenumber::phone_number::CountryCodeSource;
#[derive(Debug)] #[derive(Debug)]
pub struct PhoneNumberWithCountryCodeSource<'a> { pub struct PhoneNumberWithCountryCodeSource<'a> {

View File

@@ -6,7 +6,6 @@ pub mod phonenumberutil;
mod phone_number_regexps_and_mappings; mod phone_number_regexps_and_mappings;
pub(self) mod helper_types; pub(self) mod helper_types;
pub(self) mod comparisons; pub(self) mod comparisons;
pub(crate) mod generated;
use std::sync::LazyLock; use std::sync::LazyLock;

View File

@@ -307,13 +307,12 @@ impl PhoneNumberRegExpsAndMappings {
separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(), separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(),
extn_patterns_for_matching: create_extn_pattern(false), extn_patterns_for_matching: create_extn_pattern(false),
extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(), extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(),
valid_phone_number_pattern: Regex::new(&format!("(?i){}(?:{})?", valid_phone_number_pattern: Regex::new(&format!("(?i)^(?:{})(?:{})?$",
&valid_phone_number, &valid_phone_number,
extn_patterns_for_parsing &extn_patterns_for_parsing
)).unwrap(),
valid_alpha_phone_pattern: Regex::new(&format!("(?i)(?:.*?[{}]){{3}}",
VALID_ALPHA
)).unwrap(), )).unwrap(),
// from java
valid_alpha_phone_pattern: Regex::new("(?:.*?[A-Za-z]){3}.*").unwrap(),
// The first_group_capturing_pattern was originally set to $1 but there // The first_group_capturing_pattern was originally set to $1 but there
// are some countries for which the first group is not used in the // are some countries for which the first group is not used in the
// national pattern (e.g. Argentina) so the $1 group does not match // national pattern (e.g. Argentina) so the $1 group does not match

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
use log::{error}; use log::{error};
use super::regex_util::{RegexFullMatch, RegexConsume}; use super::regex_util::{RegexFullMatch, RegexConsume};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}}; use crate::{interfaces, phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
pub struct RegexBasedMatcher { pub struct RegexBasedMatcher {
cache: RegexCache, cache: RegexCache,

View File

@@ -1 +1,2 @@
mod tests; mod tests;
pub(self) mod region_code;

160
src/tests/region_code.rs Normal file
View File

@@ -0,0 +1,160 @@
pub struct RegionCode {}
impl RegionCode {
pub fn ad() -> &'static str {
"AD"
}
pub fn ae() -> &'static str {
"AE"
}
pub fn am() -> &'static str {
"AM"
}
pub fn ao() -> &'static str {
"AO"
}
pub fn aq() -> &'static str {
"AQ"
}
pub fn ar() -> &'static str {
"AR"
}
pub fn au() -> &'static str {
"AU"
}
pub fn bb() -> &'static str {
"BB"
}
pub fn br() -> &'static str {
"BR"
}
pub fn bs() -> &'static str {
"BS"
}
pub fn by() -> &'static str {
"BY"
}
pub fn ca() -> &'static str {
"CA"
}
pub fn ch() -> &'static str {
"CH"
}
pub fn cl() -> &'static str {
"CL"
}
pub fn cn() -> &'static str {
"CN"
}
pub fn co() -> &'static str {
"CO"
}
pub fn cs() -> &'static str {
"CS"
}
pub fn cx() -> &'static str {
"CX"
}
pub fn de() -> &'static str {
"DE"
}
pub fn fr() -> &'static str {
"FR"
}
pub fn gb() -> &'static str {
"GB"
}
pub fn hu() -> &'static str {
"HU"
}
pub fn it() -> &'static str {
"IT"
}
pub fn jp() -> &'static str {
"JP"
}
pub fn kr() -> &'static str {
"KR"
}
pub fn mx() -> &'static str {
"MX"
}
pub fn nz() -> &'static str {
"NZ"
}
pub fn pl() -> &'static str {
"PL"
}
pub fn re() -> &'static str {
"RE"
}
pub fn ru() -> &'static str {
"RU"
}
pub fn se() -> &'static str {
"SE"
}
pub fn sg() -> &'static str {
"SG"
}
pub fn un001() -> &'static str {
"001"
}
pub fn us() -> &'static str {
"US"
}
pub fn uz() -> &'static str {
"UZ"
}
pub fn yt() -> &'static str {
"YT"
}
pub fn zw() -> &'static str {
"ZW"
}
/// s a region code string representing the "unknown" region.
pub fn get_unknown() -> &'static str {
Self::zz()
}
pub fn zz() -> &'static str {
"ZZ"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
filedir="./$(dirname "$0")" filedir="./$(dirname "$0")"
javadir="$filedir/../java" javadir="$filedir/../java"
project_home="$filedir/../.." project_home="$filedir/../.."
generated_dir="$project_home/src/phonenumberutil/generated" generated_dir="$project_home/src/generated/metadata"
echo $generated_dir echo $generated_dir
resources_dir="$project_home/resources" resources_dir="$project_home/resources"
@@ -28,6 +28,9 @@ generate "PhoneNumberMetadata.xml" "metadata" "metadata" "METADATA"
generate "PhoneNumberMetadataForTesting.xml" "test_metadata" "metadata" "TEST_METADATA" generate "PhoneNumberMetadataForTesting.xml" "test_metadata" "metadata" "TEST_METADATA"
echo "\ echo "\
pub mod metadata; mod metadata;
pub mod test_metadata; mod test_metadata;
pub use metadata::METADATA;
pub use test_metadata::TEST_METADATA;
" > "$generated_dir/mod.rs" " > "$generated_dir/mod.rs"