add phone_number_mappungs

- **important** Uppercase in helper_functions::normalize_helper
This commit is contained in:
Vlasislav Kashin
2025-06-29 22:41:51 +03:00
parent aa4220ed2d
commit 929fdbae8a
7 changed files with 379 additions and 489 deletions

495
Cargo.lock generated
View File

@@ -17,64 +17,24 @@ version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "autocfg"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78"
dependencies = [
"autocfg 1.5.0",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "base64"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "489d6c0ed21b11d038c31b6ceccca973e65d73ba3bd8ecb9a2babf5546164643"
dependencies = [
"byteorder",
"safemem",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cfg-if"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
@@ -129,12 +89,6 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "getrandom"
version = "0.3.3"
@@ -144,7 +98,7 @@ dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
"wasi",
]
[[package]]
@@ -165,12 +119,6 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "home"
version = "0.5.11"
@@ -180,42 +128,6 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "httparse"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
[[package]]
name = "hyper"
version = "0.10.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a0652d9a2609a968c14be1a9ea00bf4b1d64e2e1f53a1b51b6fff3a6e829273"
dependencies = [
"base64",
"httparse",
"language-tags",
"log 0.3.9",
"mime",
"num_cpus",
"time",
"traitobject",
"typeable",
"unicase",
"url",
]
[[package]]
name = "idna"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "indexmap"
version = "2.10.0"
@@ -226,34 +138,12 @@ dependencies = [
"hashbrown 0.15.4",
]
[[package]]
name = "iron"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6d308ca2d884650a8bf9ed2ff4cb13fbb2207b71f64cda11dc9b892067295e8"
dependencies = [
"hyper",
"log 0.3.9",
"mime_guess",
"modifier",
"num_cpus",
"plugin",
"typemap",
"url",
]
[[package]]
name = "itoa"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "language-tags"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a91d884b6667cd606bb5a69aa0c99ba811a115fc68915e7056ec08a46e93199a"
[[package]]
name = "libc"
version = "0.2.174"
@@ -278,85 +168,22 @@ version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
dependencies = [
"autocfg 1.5.0",
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
dependencies = [
"log 0.4.27",
]
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "logger"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c9172cb4c2f6c52117e25570983edcbb322f130b1031ae5d5d6b1abe7eeb493"
dependencies = [
"iron",
"log 0.3.9",
"time",
]
[[package]]
name = "matches"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "mime"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba626b8a6de5da682e1caa06bdb42a335aee5a84db8e5046a3e8ab17ba0a3ae0"
dependencies = [
"log 0.3.9",
]
[[package]]
name = "mime_guess"
version = "1.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "216929a5ee4dd316b1702eedf5e74548c123d370f47841ceaac38ca154690ca3"
dependencies = [
"mime",
"phf",
"phf_codegen",
"unicase",
]
[[package]]
name = "modifier"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f5c9112cb662acd3b204077e0de5bc66305fa8df65c8019d5adb10e9ab6e58"
[[package]]
name = "num_cpus"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -376,60 +203,6 @@ dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "percent-encoding"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
[[package]]
name = "phf"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0"
dependencies = [
"siphasher",
"unicase",
]
[[package]]
name = "plugin"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a6a0dc3910bc8db877ffed8e457763b317cf880df4ae19109b9f77d277cf6e0"
dependencies = [
"typemap",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
@@ -473,7 +246,7 @@ checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973"
dependencies = [
"anyhow",
"indexmap",
"log 0.4.27",
"log",
"protobuf",
"protobuf-support",
"tempfile",
@@ -505,128 +278,13 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
dependencies = [
"autocfg 0.1.8",
"libc",
"rand_chacha",
"rand_core 0.4.2",
"rand_hc",
"rand_isaac",
"rand_jitter",
"rand_os",
"rand_pcg",
"rand_xorshift",
"winapi",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
dependencies = [
"autocfg 0.1.8",
"rand_core 0.3.1",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
dependencies = [
"libc",
"rand_core 0.4.2",
"winapi",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
dependencies = [
"cloudabi",
"fuchsia-cprng",
"libc",
"rand_core 0.4.2",
"rdrand",
"winapi",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
dependencies = [
"autocfg 0.1.8",
"rand_core 0.4.2",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
dependencies = [
"bitflags 2.9.1",
"bitflags",
]
[[package]]
@@ -665,7 +323,7 @@ dependencies = [
"dashmap",
"fast-cat",
"itoa",
"logger",
"log",
"protobuf",
"protobuf-codegen",
"regex",
@@ -679,7 +337,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.9.1",
"bitflags",
"errno",
"libc",
"linux-raw-sys 0.4.15",
@@ -692,7 +350,7 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
dependencies = [
"bitflags 2.9.1",
"bitflags",
"errno",
"libc",
"linux-raw-sys 0.9.4",
@@ -705,24 +363,12 @@ version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
[[package]]
name = "safemem"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "siphasher"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
[[package]]
name = "smallvec"
version = "1.15.1"
@@ -815,115 +461,12 @@ dependencies = [
"syn",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "tinyvec"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "traitobject"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04a79e25382e2e852e8da874249358d382ebaf259d0d34e75d8db16a7efabbc7"
[[package]]
name = "typeable"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887"
[[package]]
name = "typemap"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "653be63c80a3296da5551e1bfd2cca35227e13cdd08c6668903ae2f4f77aa1f6"
dependencies = [
"unsafe-any",
]
[[package]]
name = "unicase"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33"
dependencies = [
"version_check",
]
[[package]]
name = "unicode-bidi"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-normalization"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
dependencies = [
"tinyvec",
]
[[package]]
name = "unsafe-any"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30360d7979f5e9c6e6cea48af192ea8fab4afb3cf72597154b8f08935bc9c7f"
dependencies = [
"traitobject",
]
[[package]]
name = "url"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
dependencies = [
"idna",
"matches",
"percent-encoding",
]
[[package]]
name = "version_check"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
@@ -945,28 +488,6 @@ dependencies = [
"rustix 0.38.44",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.59.0"
@@ -1119,5 +640,5 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags 2.9.1",
"bitflags",
]

View File

@@ -7,7 +7,7 @@ build = "build/rust_build.rs"
[dependencies]
# logging standard in rust
logger = "0.4.0"
log = "0.4.27"
# helpful error package
thiserror = "2.0.12"
# google protobuf lib required to use .proto files from assets

View File

@@ -4,3 +4,4 @@ mod interfaces;
mod proto_gen;
mod phonenumberutil;
mod regexp_cache;
mod regex_based_matcher;

View File

@@ -261,7 +261,7 @@ pub(super) fn normalize_helper(
let mut normalized_number = String::with_capacity(phone_number.len());
// Skip UTF checking because strings in rust are valid UTF-8 already
for phone_char in phone_number.chars() {
if let Some(replacement) = normalization_replacements.get(&phone_char) {
if let Some(replacement) = normalization_replacements.get(&phone_char.to_ascii_uppercase()) {
normalized_number.push(*replacement);
} else if !remove_non_matches {
normalized_number.push(phone_char);

View File

@@ -3,6 +3,7 @@ pub mod helper_functions;
mod enums;
mod phonenumberutil;
mod regex_and_mappings;
mod phone_number_regexps_and_mappings;
pub use enums::{MatchType, PhoneNumberFormat, PhoneNumberType, ValidationResultErr, ValidNumberLenType};
use thiserror::Error;

View File

@@ -0,0 +1,310 @@
use std::collections::{HashMap, HashSet};
use regex::Regex;
use crate::{phonenumberutil::{helper_constants::{self, CAPTURE_UP_TO_SECOND_NUMBER_START, DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS, PLUS_SIGN, RFC3966_VISUAL_SEPARATOR, STAR_SIGN, VALID_ALPHA, VALID_ALPHA_INCL_UPPERCASE, VALID_PUNCTUATION}, helper_functions::create_extn_pattern}, regexp_cache::RegexCache};
pub(super) struct PhoneNumberRegExpsAndMappings {
/// Regular expression of viable phone numbers. This is location independent.
/// Checks we have at least three leading digits, and only valid punctuation,
/// alpha characters and digits in the phone number. Does not include extension
/// data. The symbol 'x' is allowed here as valid punctuation since it is often
/// used as a placeholder for carrier codes, for example in Brazilian phone
/// numbers. We also allow multiple plus-signs at the start.
///
/// Corresponds to the following:
/// `[digits]{minLengthNsn}|
/// plus_sign*(([punctuation]|[star])*[digits]){3,}
/// ([punctuation]|[star]|[digits]|[alpha])*`
///
/// The first reg-ex is to allow short numbers (two digits long) to be parsed
/// if they are entered as "15" etc, but only if there is no punctuation in
/// them. The second expression restricts the number of digits to three or
/// more, but then allows them to be in international form, and to have
/// alpha-characters and punctuation.
valid_phone_number: String,
/// Regexp of all possible ways to write extensions, for use when parsing. This
/// will be run as a case-insensitive regexp match. Wide character versions are
/// also provided after each ASCII version.
/// For parsing, we are slightly more lenient in our interpretation than for
/// matching. Here we allow "comma" and "semicolon" as possible extension
/// indicators. When matching, these are hardly ever used to indicate this.
extn_patterns_for_parsing: String,
/// Regular expressions of different parts of the phone-context parameter,
/// following the syntax defined in RFC3966.
rfc3966_phone_digit: String,
alphanum: String,
rfc3966_domainlabel: String,
rfc3966_toplabel: String,
pub regexp_cache: RegexCache,
/// A map that contains characters that are essential when dialling. That means
/// any of the characters in this map must not be removed from a number when
/// dialing, otherwise the call will not reach the intended destination.
pub diallable_char_mappings: HashMap<char, char>,
/// These mappings map a character (key) to a specific digit that should
/// replace it for normalization purposes.
pub alpha_mappings: HashMap<char, char>,
/// For performance reasons, store a map of combining alpha_mappings with ASCII
/// digits.
pub alpha_phone_mappings: HashMap<char, char>,
/// Separate map of all symbols that we wish to retain when formatting alpha
/// numbers. This includes digits, ascii letters and number grouping symbols
/// such as "-" and " ".
pub all_plus_number_grouping_symbols: HashMap<char, char>,
/// Map of country calling codes that use a mobile token before the area code.
/// One example of when this is relevant is when determining the length of the
/// national destination code, which should be the length of the area code plus
/// the length of the mobile token.
pub mobile_token_mappings: HashMap<i32, char>,
/// Set of country codes that doesn't have national prefix, but it has area
/// codes.
pub countries_without_national_prefix_with_area_codes: HashSet<i32>,
/// Set of country codes that have geographically assigned mobile numbers (see
/// geo_mobile_countries_ below) which are not based on *area codes*. For
/// example, in China mobile numbers start with a carrier indicator, and beyond
/// that are geographically assigned: this carrier indicator is not considered
/// to be an area code.
pub geo_mobile_countries_without_mobile_area_codes: HashSet<i32>,
/// Set of country calling codes that have geographically assigned mobile
/// numbers. This may not be complete; we add calling codes case by case, as we
/// find geographical mobile numbers or hear from user reports.
pub geo_mobile_countries: HashSet<i32>,
/// Pattern that makes it easy to distinguish whether a region has a single
/// international dialing prefix or not. If a region has a single international
/// prefix (e.g. 011 in USA), it will be represented as a string that contains
/// a sequence of ASCII digits, and possibly a tilde, which signals waiting for
/// the tone. If there are multiple available international prefixes in a
/// region, they will be represented as a regex string that always contains one
/// or more characters that are not ASCII digits or a tilde.
pub single_international_prefix: Regex,
pub digits_pattern: Regex,
pub capturing_digit_pattern: Regex,
pub capturing_ascii_digits_pattern: Regex,
/// Regular expression of acceptable characters that may start a phone number
/// for the purposes of parsing. This allows us to strip away meaningless
/// prefixes to phone numbers that may be mistakenly given to us. This consists
/// of digits, the plus symbol and arabic-indic digits. This does not contain
/// alpha characters, although they may be used later in the number. It also
/// does not include other punctuation, as this will be stripped later during
/// parsing and is of no information value when parsing a number. The string
/// starting with this valid character is captured.
/// This corresponds to VALID_START_CHAR in the java version.
pub valid_start_char_pattern: Regex,
/// Regular expression of valid characters before a marker that might indicate
/// a second number.
pub capture_up_to_second_number_start_pattern: Regex,
/// Regular expression of trailing characters that we want to remove. We remove
/// all characters that are not alpha or numerical characters. The hash
/// character is retained here, as it may signify the previous block was an
/// extension. Note the capturing block at the start to capture the rest of the
/// number if this was a match.
/// This corresponds to UNWANTED_END_CHAR_PATTERN in the java version.
pub unwanted_end_char_pattern: Regex,
/// Regular expression of groups of valid punctuation characters.
pub separator_pattern: Regex,
/// Regexp of all possible ways to write extensions, for use when finding phone
/// numbers in text. This will be run as a case-insensitive regexp match. Wide
/// character versions are also provided after each ASCII version.
pub extn_patterns_for_matching: String,
/// Regexp of all known extension prefixes used by different regions followed
/// by 1 or more valid digits, for use when parsing.
pub extn_pattern: Regex,
/// We append optionally the extension pattern to the end here, as a valid
/// phone number may have an extension prefix appended, followed by 1 or more
/// digits.
pub valid_phone_number_pattern: Regex,
/// We use this pattern to check if the phone number has at least three letters
/// in it - if so, then we treat it as a number where some phone-number digits
/// are represented by letters.
pub valid_alpha_phone_pattern: Regex,
pub first_group_capturing_pattern: Regex,
pub carrier_code_pattern: Regex,
pub plus_chars_pattern: Regex,
/// Regular expression of valid global-number-digits for the phone-context
/// parameter, following the syntax defined in RFC3966.
pub rfc3966_global_number_digits_pattern: Regex,
/// Regular expression of valid domainname for the phone-context parameter,
/// following the syntax defined in RFC3966.
pub rfc3966_domainname_pattern: Regex,
}
impl PhoneNumberRegExpsAndMappings {
fn initialize_regexp_mappings(&mut self) {
self.mobile_token_mappings.insert(54, '9');
self.geo_mobile_countries_without_mobile_area_codes.insert(86); // China
self.countries_without_national_prefix_with_area_codes.insert(52); // Mexico
self.geo_mobile_countries.insert(52); // Mexico
self.geo_mobile_countries.insert(54); // Argentina
self.geo_mobile_countries.insert(55); // Brazil
self.geo_mobile_countries.insert(62); // Indonesia: some prefixes only (fixed CMDA wireless)
self.geo_mobile_countries.extend(&self.geo_mobile_countries_without_mobile_area_codes);
// Simple ASCII digits map used to populate ALPHA_PHONE_MAPPINGS and
// ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
let mut ascii_digit_mappings = HashMap::with_capacity(10);
for d in '0'..'9' {
ascii_digit_mappings.insert(d, d);
}
let mut alpha_map = HashMap::with_capacity(40);
alpha_map.insert('A', '2');
alpha_map.insert('B', '2');
alpha_map.insert('C', '2');
alpha_map.insert('D', '3');
alpha_map.insert('E', '3');
alpha_map.insert('F', '3');
alpha_map.insert('G', '4');
alpha_map.insert('H', '4');
alpha_map.insert('I', '4');
alpha_map.insert('J', '5');
alpha_map.insert('K', '5');
alpha_map.insert('L', '5');
alpha_map.insert('M', '6');
alpha_map.insert('N', '6');
alpha_map.insert('O', '6');
alpha_map.insert('P', '7');
alpha_map.insert('Q', '7');
alpha_map.insert('R', '7');
alpha_map.insert('S', '7');
alpha_map.insert('T', '8');
alpha_map.insert('U', '8');
alpha_map.insert('V', '8');
alpha_map.insert('W', '9');
alpha_map.insert('X', '9');
alpha_map.insert('Y', '9');
alpha_map.insert('Z', '9');
self.alpha_mappings = alpha_map;
let mut combined_map = HashMap::with_capacity(100);
combined_map.extend(self.alpha_mappings.iter());
combined_map.extend(ascii_digit_mappings.iter());
self.alpha_phone_mappings = combined_map;
let mut dilatable_char_map = HashMap::new();
dilatable_char_map.extend(ascii_digit_mappings.iter());
dilatable_char_map.insert('+', '+');
dilatable_char_map.insert('*', '*');
dilatable_char_map.insert('#', '#');
self.diallable_char_mappings = dilatable_char_map;
let mut all_plus_number_groupings = HashMap::new();
// insert (lower letter -> upper letter) and (upper letter -> upper letter) mappings.
for c in self.alpha_mappings.keys() {
all_plus_number_groupings.insert(c.to_ascii_lowercase(), *c);
all_plus_number_groupings.insert(*c, *c);
}
all_plus_number_groupings.extend(ascii_digit_mappings.iter());
// insert grouping symbols.
all_plus_number_groupings.insert('-', '-');
all_plus_number_groupings.insert('\u{FF0D}', '-');
all_plus_number_groupings.insert('\u{2010}', '-');
all_plus_number_groupings.insert('\u{2011}', '-');
all_plus_number_groupings.insert('\u{2012}', '-');
all_plus_number_groupings.insert('\u{2013}', '-');
all_plus_number_groupings.insert('\u{2014}', '-');
all_plus_number_groupings.insert('\u{2015}', '-');
all_plus_number_groupings.insert('\u{2212}', '-');
all_plus_number_groupings.insert('/', '/');
all_plus_number_groupings.insert('\u{FF0F}', '/');
all_plus_number_groupings.insert(' ', ' ');
all_plus_number_groupings.insert('\u{3000}', ' ');
all_plus_number_groupings.insert('\u{2060}', ' ');
all_plus_number_groupings.insert('.', '.');
all_plus_number_groupings.insert('\u{FF0E}', '.');
self.all_plus_number_grouping_symbols = all_plus_number_groupings;
}
fn new() -> Self {
let alphanum = fast_cat::concat_str!(VALID_ALPHA_INCL_UPPERCASE, DIGITS);
let extn_patterns_for_parsing = create_extn_pattern(true);
let valid_phone_number = format!(
"{}{{{}}}|[{}]*(?:[{}{}]*{}){{3,}}[{}{}{}{}]*",
DIGITS, MIN_LENGTH_FOR_NSN, PLUS_CHARS,
VALID_PUNCTUATION, STAR_SIGN, DIGITS,
VALID_PUNCTUATION, STAR_SIGN, VALID_ALPHA, DIGITS
);
let rfc3966_phone_digit = format!("({}|{})", DIGITS, RFC3966_VISUAL_SEPARATOR);
let rfc3966_domainlabel = format!("[{}]+((\\-)*[{}])*", alphanum, alphanum);
let rfc3966_toplabel = format!("[{}]+((\\-)*[{}])*", VALID_ALPHA_INCL_UPPERCASE, alphanum);
let mut instance = Self{
// it'll be initialized only once, so we can use slow format!
valid_phone_number: valid_phone_number.clone(),
extn_patterns_for_parsing: extn_patterns_for_parsing.clone(),
rfc3966_phone_digit: rfc3966_phone_digit.clone(),
alphanum: alphanum,
rfc3966_domainlabel: rfc3966_domainlabel.clone(),
rfc3966_toplabel: rfc3966_toplabel.clone(),
regexp_cache: RegexCache::with_capacity(128),
diallable_char_mappings: Default::default(),
alpha_mappings: Default::default(),
alpha_phone_mappings: Default::default(),
all_plus_number_grouping_symbols: Default::default(),
mobile_token_mappings: Default::default(),
countries_without_national_prefix_with_area_codes: Default::default(),
geo_mobile_countries: Default::default(),
geo_mobile_countries_without_mobile_area_codes: Default::default(),
single_international_prefix: Regex::new("[\\d]+(?:[~\u{2053}\u{223C}\u{FF5E}][\\d]+)?").unwrap(),
digits_pattern: Regex::new(&format!("[{}]*", DIGITS)).unwrap(),
capturing_digit_pattern: Regex::new(&format!("([{}])", DIGITS)).unwrap(),
capturing_ascii_digits_pattern: Regex::new("(\\d+)").unwrap(),
valid_start_char_pattern: Regex::new(&format!("[{}{}]", PLUS_CHARS, DIGITS)).unwrap(),
capture_up_to_second_number_start_pattern: Regex::new(CAPTURE_UP_TO_SECOND_NUMBER_START).unwrap(),
unwanted_end_char_pattern: Regex::new("[^\\p{N}\\p{L}#]").unwrap(),
separator_pattern: Regex::new(&format!("[{}]+", VALID_PUNCTUATION)).unwrap(),
extn_patterns_for_matching: create_extn_pattern(false),
extn_pattern: Regex::new(&format!("(?i)(?:{})$", &extn_patterns_for_parsing)).unwrap(),
valid_phone_number_pattern: Regex::new(&format!("(?i){}(?:{})?",
&valid_phone_number,
extn_patterns_for_parsing
)).unwrap(),
valid_alpha_phone_pattern: Regex::new(&format!("(?i)(?:.*?[{}]){{3}}",
VALID_ALPHA
)).unwrap(),
// The first_group_capturing_pattern was originally set to $1 but there
// are some countries for which the first group is not used in the
// national pattern (e.g. Argentina) so the $1 group does not match
// correctly. Therefore, we use \d, so that the first group actually
// used in the pattern will be matched.
first_group_capturing_pattern: Regex::new("(\\$\\d)").unwrap(),
carrier_code_pattern: Regex::new("\\$CC").unwrap(),
plus_chars_pattern: Regex::new(&format!("[{}]+", &PLUS_CHARS)).unwrap(),
rfc3966_global_number_digits_pattern: Regex::new(
&format!("^\\{}{}*{}{}*$", PLUS_SIGN, &rfc3966_phone_digit, DIGITS, rfc3966_phone_digit)
).unwrap(),
rfc3966_domainname_pattern: Regex::new(
&format!("^({}\\.)*{}\\.?$", rfc3966_domainlabel, rfc3966_toplabel)
).unwrap(),
};
instance.initialize_regexp_mappings();
instance
}
}

View File

@@ -0,0 +1,57 @@
use log::{error};
use crate::{interfaces, proto_gen::phonemetadata::PhoneNumberDesc, regexp_cache::{self, RegexCache}};
pub struct RegexBasedMatcher {
cache: RegexCache,
}
impl RegexBasedMatcher {
pub fn new() -> Self {
Self { cache: RegexCache::with_capacity(128) }
}
fn match_number(
&self, phone_number: &str,
number_pattern: &str,
allow_prefix_match: bool
) -> Result<bool, regexp_cache::ErrorInvalidRegex> {
let regexp = self.cache.get_regex(number_pattern)?;
// find first occurrence
if let Some(mat) = regexp.find(phone_number) {
// if first position is not matched none of scenarios are possible
if mat.start() != 0 {
return Ok(false);
}
// full match
if mat.end() == phone_number.len() {
return Ok(true);
} else if allow_prefix_match {
return Ok(true);
}
}
Ok(false)
}
}
impl interfaces::MatcherApi for RegexBasedMatcher {
fn match_national_number(
&self, number: &str,
number_desc: &PhoneNumberDesc,
allow_prefix_match: bool
) -> bool {
let national_number_pattern = number_desc.national_number_pattern();
// We don't want to consider it a prefix match when matching non-empty input
// against an empty pattern.
if national_number_pattern.is_empty() {
return false;
}
if let Ok(res) = self.match_number(number, national_number_pattern, allow_prefix_match) {
res
} else {
error!("Invalid regex! {}", national_number_pattern);
false
}
}
}