Compare commits

..

9 Commits

Author SHA1 Message Date
Vlasislav Kashin
cb5f0d8fcc Refactor build script 2025-07-13 15:01:40 +03:00
Vlasislav Kashin
3a2e8e6c0f Move helper constants out of folder 2025-07-13 14:59:39 +03:00
Vlasislav Kashin
467416e3ef Update generated location 2025-07-13 14:58:49 +03:00
Vlasislav Kashin
1464119ff8 Better error naming 2025-07-13 14:52:06 +03:00
Vlasislav Kashin
beae04dee8 Add more tests, better error handling 2025-07-13 14:49:56 +03:00
Vlasislav Kashin
ebe7d236e9 feat: update regex, bug fixes, add tests 2025-07-12 23:30:44 +03:00
Vlasislav Kashin
2fea8f1e20 Phonenumberutil: add is_alpha_number 2025-07-12 21:21:32 +03:00
Vlasislav Kashin
392c793d5c Update phonenumberutil get_national_significant_number - &self reciever 2025-07-12 20:59:17 +03:00
Vlasislav Kashin
e7daffa6f7 helper_constants: fix const REGION_CODE_FOR_NON_GEO_ENTITY 2025-07-12 20:35:33 +03:00
24 changed files with 2938 additions and 957 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}

View File

@@ -10,7 +10,7 @@ build = "build/rust_build.rs"
log = "0.4.27"
# helpful error package
thiserror = "2.0.12"
# google protobuf lib required to use .proto files from assets
# protobuf lib required to use .proto files from assets
protobuf = "3.7.2"
# optimized concurrent map
dashmap = "6.1.0"

View File

@@ -1,8 +1,9 @@
/** TODO: uncomment, move to macros and refactor
/**
* This file represents content of https://github.com/google/libphonenumber/tree/master/tools/cpp
*/
use std::{collections::BTreeMap, fs::File, io::{BufRead, BufReader}, num::ParseIntError, path::Path};
use thiserror::Error;
@@ -68,9 +69,9 @@ fn parse_prefixes(path: &str, prefixes: &mut BTreeMap<i32, String>) -> Result<()
Ok(())
}
*/
fn main() -> Result<(), BuildError> {
fn main() {
protobuf_codegen::Codegen::new()
.pure()
.includes(["resources"])
@@ -78,5 +79,4 @@ fn main() -> Result<(), BuildError> {
.input("resources/phonenumber.proto")
.cargo_out_dir("proto_gen")
.run_from_script();
Ok(())
}

View File

@@ -0,0 +1,6 @@
mod metadata;
mod test_metadata;
pub use metadata::METADATA;
pub use test_metadata::TEST_METADATA;

2
src/generated/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod proto;
pub mod metadata;

View File

@@ -1,5 +1,5 @@
use crate::proto_gen::phonemetadata::PhoneNumberDesc;
use crate::phonemetadata::PhoneNumberDesc;
/// Internal phonenumber matching API used to isolate the underlying
/// implementation of the matcher and allow different implementations to be
/// swapped in easily.

View File

@@ -1,7 +1,7 @@
mod shortnumberinfo;
mod interfaces;
/// This module is automatically generated from /resources/*.proto
mod proto_gen;
mod generated;
mod phonenumberutil;
mod regexp_cache;
mod regex_based_matcher;
@@ -30,6 +30,6 @@ pub use phonenumberutil::{
errors,
enums,
};
pub use proto_gen::phonemetadata;
pub use proto_gen::phonenumber;
pub use generated::proto::phonemetadata;
pub use generated::proto::phonenumber;
mod tests;

View File

@@ -7,10 +7,10 @@ use crate::regexp_cache::ErrorInvalidRegex;
#[derive(Debug, PartialEq, Error)]
pub enum InternalLogicError {
#[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex),
InvalidRegex(#[from] ErrorInvalidRegex),
#[error("{0}")]
InvalidMetadataForValidRegionError(#[from] InvalidMetadataForValidRegionError)
InvalidMetadataForValidRegion(#[from] InvalidMetadataForValidRegionError)
}
#[derive(Debug, PartialEq, Error)]
@@ -18,9 +18,9 @@ pub enum ParseError {
// Removed as OK variant
// NoParsingError,
#[error("Invalid country code")]
InvalidCountryCodeError, // INVALID_COUNTRY_CODE in the java version.
#[error("Not a number")]
NotANumber,
InvalidCountryCode, // INVALID_COUNTRY_CODE in the java version.
#[error("Not a number: {0}")]
NotANumber(#[from] NotANumberError),
#[error("Too short after idd")]
TooShortAfterIdd,
#[error("Too short Nsn")]
@@ -28,11 +28,19 @@ pub enum ParseError {
#[error("Too long nsn")]
TooLongNsn, // TOO_LONG in the java version.
#[error("{0}")]
InvalidRegexError(#[from] ErrorInvalidRegex),
InvalidRegex(#[from] ErrorInvalidRegex),
}
#[derive(Debug, PartialEq, Error)]
pub enum NotANumberError {
#[error("Number not matched a valid number pattern")]
NotMatchedValidNumberPattern,
#[error("Invalid phone context")]
InvalidPhoneContext,
#[error("{0}")]
ParseNumberAsIntError(#[from] ParseIntError),
FailedToParseNumberAsInt(#[from] ParseIntError),
#[error("{0}")]
ExtractNumberError(#[from] ExtractNumberError),
FailedToExtractNumber(#[from] ExtractNumberError),
}
#[derive(Debug, PartialEq, Error)]
@@ -43,18 +51,24 @@ pub enum ExtractNumberError {
NotANumber,
}
impl From<ExtractNumberError> for ParseError {
fn from(value: ExtractNumberError) -> Self {
NotANumberError::FailedToExtractNumber(value).into()
}
}
#[derive(Debug, PartialEq, Error)]
pub enum GetExampleNumberError {
#[error("Parse error: {0}")]
ParseError(#[from] ParseError),
FailedToParse(#[from] ParseError),
#[error("{0}")]
InternalLogicError(#[from] InternalLogicError),
Internal(#[from] InternalLogicError),
#[error("No example number")]
NoExampleNumberError,
NoExampleNumber,
#[error("Could not get number")]
CouldNotGetNumberError,
CouldNotGetNumber,
#[error("Invalid metadata")]
InvalidMetadataError
InvalidMetadata
}

View File

@@ -1,3 +0,0 @@
pub mod metadata;
pub mod test_metadata;

View File

@@ -28,7 +28,7 @@ pub const VALID_PUNCTUATION: &'static str = "-x\
pub const CAPTURE_UP_TO_SECOND_NUMBER_START: &'static str = r"(.*)[\\/] *x";
pub const REGION_CODE_FOR_NON_GEO_ENTITY: &'static str = "0001";
pub const REGION_CODE_FOR_NON_GEO_ENTITY: &'static str = "001";
pub const PLUS_SIGN: &'static str = "+";
pub const STAR_SIGN: &'static str = "*";
@@ -50,11 +50,11 @@ pub const VALID_ALPHA_INCL_UPPERCASE: &'static str = "A-Za-z";
// prefix. This can be overridden by region-specific preferences.
pub const DEFAULT_EXTN_PREFIX: &'static str = " ext. ";
pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "0001";
pub const POSSIBLE_SEPARATORS_BETWEEN_NUMBER_AND_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*";
// Optional full stop (.) or colon, followed by zero or more
// spaces/tabs/commas.
pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[ \u{00A0}\\t,]*";
pub const OPTIONAL_EXT_SUFFIX: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*";
pub const POSSIBLE_CHARS_AFTER_EXT_LABEL: &'static str = "[:\\.\u{FF0E}]?[ \u{00A0}\\t,-]*";
pub const OPTIONAL_EXT_SUFFIX: &'static str = "#?";
pub const NANPA_COUNTRY_CODE: i32 = 1;

View File

@@ -1,3 +0,0 @@
mod helper_constants;
pub(super) use helper_constants::{*};

View File

@@ -4,10 +4,10 @@ use protobuf::Message;
use strum::IntoEnumIterator;
use crate::{
interfaces::MatcherApi, phonenumberutil::generated::metadata::METADATA, proto_gen::{
interfaces::MatcherApi, generated::metadata::METADATA,
phonemetadata::{PhoneMetadata, PhoneMetadataCollection, PhoneNumberDesc},
phonenumber::PhoneNumber,
}
};
use super::{

View File

@@ -1,6 +1,6 @@
use std::borrow::Cow;
use crate::proto_gen::phonenumber::phone_number::CountryCodeSource;
use crate::phonenumber::phone_number::CountryCodeSource;
#[derive(Debug)]
pub struct PhoneNumberWithCountryCodeSource<'a> {

View File

@@ -6,7 +6,6 @@ pub mod phonenumberutil;
mod phone_number_regexps_and_mappings;
pub(self) mod helper_types;
pub(self) mod comparisons;
pub(crate) mod generated;
use std::sync::LazyLock;

View File

@@ -307,13 +307,12 @@ impl PhoneNumberRegExpsAndMappings {
separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(),
extn_patterns_for_matching: create_extn_pattern(false),
extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(),
valid_phone_number_pattern: Regex::new(&format!("(?i){}(?:{})?",
valid_phone_number_pattern: Regex::new(&format!("(?i)^(?:{})(?:{})?$",
&valid_phone_number,
extn_patterns_for_parsing
)).unwrap(),
valid_alpha_phone_pattern: Regex::new(&format!("(?i)(?:.*?[{}]){{3}}",
VALID_ALPHA
&extn_patterns_for_parsing
)).unwrap(),
// from java
valid_alpha_phone_pattern: Regex::new("(?:.*?[A-Za-z]){3}.*").unwrap(),
// The first_group_capturing_pattern was originally set to $1 but there
// are some countries for which the first group is not used in the
// national pattern (e.g. Argentina) so the $1 group does not match

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
use log::{error};
use super::regex_util::{RegexFullMatch, RegexConsume};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
use crate::{interfaces, phonemetadata::PhoneNumberDesc, regexp_cache::{ErrorInvalidRegex, RegexCache}};
pub struct RegexBasedMatcher {
cache: RegexCache,

View File

@@ -1 +1,2 @@
mod tests;
pub(self) mod region_code;

160
src/tests/region_code.rs Normal file
View File

@@ -0,0 +1,160 @@
pub struct RegionCode {}
impl RegionCode {
pub fn ad() -> &'static str {
"AD"
}
pub fn ae() -> &'static str {
"AE"
}
pub fn am() -> &'static str {
"AM"
}
pub fn ao() -> &'static str {
"AO"
}
pub fn aq() -> &'static str {
"AQ"
}
pub fn ar() -> &'static str {
"AR"
}
pub fn au() -> &'static str {
"AU"
}
pub fn bb() -> &'static str {
"BB"
}
pub fn br() -> &'static str {
"BR"
}
pub fn bs() -> &'static str {
"BS"
}
pub fn by() -> &'static str {
"BY"
}
pub fn ca() -> &'static str {
"CA"
}
pub fn ch() -> &'static str {
"CH"
}
pub fn cl() -> &'static str {
"CL"
}
pub fn cn() -> &'static str {
"CN"
}
pub fn co() -> &'static str {
"CO"
}
pub fn cs() -> &'static str {
"CS"
}
pub fn cx() -> &'static str {
"CX"
}
pub fn de() -> &'static str {
"DE"
}
pub fn fr() -> &'static str {
"FR"
}
pub fn gb() -> &'static str {
"GB"
}
pub fn hu() -> &'static str {
"HU"
}
pub fn it() -> &'static str {
"IT"
}
pub fn jp() -> &'static str {
"JP"
}
pub fn kr() -> &'static str {
"KR"
}
pub fn mx() -> &'static str {
"MX"
}
pub fn nz() -> &'static str {
"NZ"
}
pub fn pl() -> &'static str {
"PL"
}
pub fn re() -> &'static str {
"RE"
}
pub fn ru() -> &'static str {
"RU"
}
pub fn se() -> &'static str {
"SE"
}
pub fn sg() -> &'static str {
"SG"
}
pub fn un001() -> &'static str {
"001"
}
pub fn us() -> &'static str {
"US"
}
pub fn uz() -> &'static str {
"UZ"
}
pub fn yt() -> &'static str {
"YT"
}
pub fn zw() -> &'static str {
"ZW"
}
/// s a region code string representing the "unknown" region.
pub fn get_unknown() -> &'static str {
Self::zz()
}
pub fn zz() -> &'static str {
"ZZ"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
filedir="./$(dirname "$0")"
javadir="$filedir/../java"
project_home="$filedir/../.."
generated_dir="$project_home/src/phonenumberutil/generated"
generated_dir="$project_home/src/generated/metadata"
echo $generated_dir
resources_dir="$project_home/resources"
@@ -28,6 +28,9 @@ generate "PhoneNumberMetadata.xml" "metadata" "metadata" "METADATA"
generate "PhoneNumberMetadataForTesting.xml" "test_metadata" "metadata" "TEST_METADATA"
echo "\
pub mod metadata;
pub mod test_metadata;
mod metadata;
mod test_metadata;
pub use metadata::METADATA;
pub use test_metadata::TEST_METADATA;
" > "$generated_dir/mod.rs"