Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ All notable changes to this project will be documented in this file.
- Added `supported_entity` parameter to `PhoneRecognizer`. Previously, this recognizer hard-coded `["PHONE_NUMBER"]` as the only possible supported entity.

#### Fixed
- Fixed `PhoneRecognizer._get_recognizer_result` to use the constructor-provided `supported_entity` instead of the hard-coded `"PHONE_NUMBER"` string, making the `supported_entity` parameter from PR #2014 fully functional.
- Fixed incorrect Prüfziffer algorithm in `DeHealthInsuranceRecognizer` (KVNR); now uses alternating factors [1,2,…,1,2] per § 290 SGB V Anlage 1 (#1972).
- Fixed incorrect check-digit weights in `DeSocialSecurityRecognizer` (RVNR); now uses VKVV § 4 weights [2,1,2,5,7,1,2,1,2,1,2,1]. Previous weights diverged from the Deutsche Rentenversicherung specification and rejected the canonical DRV example 15070649C103.
- Fixed incorrect check-digit algorithm in `DeLanrRecognizer`; now uses KBV Arztnummern-Richtlinie weights [4,9,4,9,4,9] without the spurious Quersumme step, and the complement-to-10 formula `(10 − sum mod 10) mod 10`. Previous weights and formula were internally self-consistent only.
Expand All @@ -32,6 +33,7 @@ All notable changes to this project will be documented in this file.
- ICAO Doc 9303 MRZ checksum validation in `DePassportRecognizer` and `DeIdCardRecognizer` (weights 7, 3, 1 repeating; letters A=10…Z=35; sum mod 10).
- Structural validation improvements in `DeBsnrRecognizer` per KBV Arztnummern-Richtlinie Anlage 1; valid KV regional codes are defined for defense-in-depth/documentation purposes, but unknown prefixes are not currently rejected (no public checksum exists for BSNR).
- Turkish PII recognizer for `TR_NATIONAL_ID` (TCKN) to identify Turkish National Identification Numbers using pattern match, context, and NVI checksum validation. Disabled by default.
- Turkish phone number detection via configurable `PhoneRecognizer` with `supported_regions=["TR"]` and `supported_entity="TR_PHONE_NUMBER"`. Supports international (+90), national (0), and local formats using the `phonenumbers` library. Disabled by default; users enable it programmatically.
- Turkish PII recognizer for `TR_LICENSE_PLATE` (plaka) to identify Turkish vehicle license plates using pattern match, context, and province code validation (01-81). Disabled by default.

## [2.2.362] - 2026-03-15
Expand Down
1 change: 1 addition & 0 deletions docs/supported_entities.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ For more information, refer to the [adding new recognizers documentation](analyz
| FieldType | Description | Detection Method |
|------------|---------------------------------------------------------------------------------------------------------|------------------------------------------|
| TR_NATIONAL_ID | The Turkish National Identification Number (TCKN) is a unique 11-digit number issued to all Turkish citizens. | Pattern match, context and checksum. |
| TR_PHONE_NUMBER | Turkish phone numbers: 10-digit numbers starting with 5 (mobile) or 2/3/4 (geographic). Supports international (+90), national (0), and local formats. Includes mobile (MNP-compliant) and geographic numbers. Reference: ITU-T E.164. Enabled programmatically via `PhoneRecognizer(supported_regions=["TR"], supported_entity="TR_PHONE_NUMBER")`. | `phonenumbers` library, context and format validation. |
| TR_LICENSE_PLATE | Turkish vehicle license plate (plaka): 2-digit province code (01–81), 1–3 letters (A–Z, excluding Q, W, X), and 2–4 digits. Standard civilian format only. Legal basis: KTK Madde 23. | Pattern match, context and province code validation. |

### Germany
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def analyze(
"""Analyzes text to detect phone numbers using python-phonenumbers.

Iterates over entities, fetching regions, then matching regional
phone numbers patterns against the text.
phone number patterns against the text.
:param text: Text to be analyzed
:param entities: Entities this recognizer can detect
:param nlp_artifacts: Additional metadata from the NLP engine
Expand All @@ -83,7 +83,7 @@ def analyze(

def _get_recognizer_result(self, match, text, region, nlp_artifacts):
result = RecognizerResult(
entity_type="PHONE_NUMBER",
entity_type=self.supported_entities[0],
start=match.start,
end=match.end,
score=self.SCORE,
Expand Down
176 changes: 176 additions & 0 deletions presidio-analyzer/tests/test_tr_phone_number_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""Tests for Turkish phone number (TR_PHONE_NUMBER) recognizer."""

import pytest
from presidio_analyzer.predefined_recognizers import PhoneRecognizer

from tests import assert_result_within_score_range

TURKISH_CONTEXT = [
"telefon",
"telefon numarası",
"cep telefonu",
"cep no",
"telefon no",
"numara",
"mobil telefon",
"mobil no",
"hücresel telefon",
"ara",
"ulaş",
"iletişim",
"bağlantı",
"irtibat",
"numaram",
"telefonum",
"cep telefonum",
"mobil telefonum",
"telefon numarası",
"cep numarası",
"mobil numarası",
"phone",
"mobile",
"cell",
"cellphone",
"call",
"contact",
"number",
"phone number",
"sms",
"mesaj",
"whatsapp",
"telegram",
"signal",
"viber",
]


@pytest.fixture(scope="module")
def recognizer():
"""Create a TR-configured PhoneRecognizer instance for testing."""
return PhoneRecognizer(
supported_regions=["TR"],
supported_entity="TR_PHONE_NUMBER",
context=TURKISH_CONTEXT,
supported_language="en",
)


@pytest.fixture(scope="module")
def entities():
"""Return the TR_PHONE_NUMBER entity type for testing."""
return ["TR_PHONE_NUMBER"]


@pytest.mark.parametrize(
"text, expected_len, expected_positions, expected_score_ranges",
[
# International format (+90)
("+905321234567", 1, ((0, 13),), ((0.4, 1.0),)),
("+90 532 123 45 67", 1, ((0, 17),), ((0.4, 1.0),)),
("+90-532-123-45-67", 1, ((0, 17),), ((0.4, 1.0),)),
("+90 (532) 123 45 67", 1, ((0, 19),), ((0.4, 1.0),)),
# National format (0)
("05321234567", 1, ((0, 11),), ((0.3, 1.0),)),
("0 532 123 45 67", 1, ((0, 15),), ((0.3, 1.0),)),
("0-532-123-45-67", 1, ((0, 15),), ((0.3, 1.0),)),
("0 (532) 1234567", 1, ((0, 15),), ((0.3, 1.0),)),
# Local format (just the number)
("5321234567", 1, ((0, 10),), ((0.15, 1.0),)),
("532 123 45 67", 1, ((0, 13),), ((0.15, 1.0),)),
("532-123-45-67", 1, ((0, 13),), ((0.15, 1.0),)),
# In sentence with context
(
"Telefon numaram +905321234567 olarak kayitli.",
1,
((16, 29),),
((0.4, 1.0),),
),
(
"Cep no: 05321234567",
1,
((8, 19),),
((0.3, 1.0),),
),
(
"Phone: 5321234567",
1,
((7, 17),),
((0.15, 1.0),),
),
# Multiple numbers
(
"Birinci: +905321234567, Ikinci: 05359876543",
2,
((9, 22), (32, 43)),
((0.4, 1.0), (0.3, 1.0)),
),
# Geographic number: starts with 4 (valid geographic)
("4321234567", 1, ((0, 10),), ((0.05, 1.0),)),
# Invalid: too short
("532123456", 0, (), ()),
# Invalid: too long (11 digits without prefix)
("53212345678", 0, (), ()),
# Invalid: not a phone number
("hello world", 0, (), ()),
("1234567890", 0, (), ()),
# False positive: random 10-digit number not starting with 5
("12345678901", 0, (), ()),
# Geographic numbers: valid area codes (starts with 2, 3, 4)
("2121234567", 1, ((0, 10),), ((0.05, 1.0),)),
("3121234567", 1, ((0, 10),), ((0.05, 1.0),)),
("4621234567", 1, ((0, 10),), ((0.05, 1.0),)),
# Geographic numbers (lower priority)
("02121234567", 1, ((0, 11),), ((0.1, 1.0),)),
("0216 123 45 67", 1, ((0, 14),), ((0.1, 1.0),)),
("0232 123 45 67", 1, ((0, 14),), ((0.05, 1.0),)),
("0312 123 45 67", 1, ((0, 14),), ((0.1, 1.0),)),
("0412 123 45 67", 1, ((0, 14),), ((0.1, 1.0),)),

# False positive: embedded in longer number
("15053212345678", 0, (), ()),
# Geographic number: starts with 2 (valid geographic)
# Note: 2023123456 is not recognized by phonenumbers as valid TR number
# ("202" is not a valid Turkish area code in phonenumbers)
# ("2121234567", 1, ((0, 10),), ((0.05, 1.0),)),
# False positive: TCKN-like (11 digits starting with 1)
("10000000146", 0, (), ()),
# Note: phonenumbers matches dotted format as valid TR number
("532.123.45.67", 1, ((0, 13),), ((0.05, 1.0),)),
# False positive: Turkish plate-like
("34 ABC 1234", 0, (), ()),
# Invalid: unused first digits in Turkey (1, 6, 7, 8, 9)
("1123456789", 0, (), ()),
("6123456789", 0, (), ()),
("7123456789", 0, (), ()),
("8123456789", 0, (), ()),
("9123456789", 0, (), ()),
],
)
def test_when_phone_in_text_then_all_phones_found(
text,
expected_len,
expected_positions,
expected_score_ranges,
recognizer,
entities,
):
"""Test that Turkish phone number recognizer correctly identifies numbers."""
results = recognizer.analyze(text, entities)
assert len(results) == expected_len

for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
results, expected_positions, expected_score_ranges
):
assert_result_within_score_range(
res, entities[0], st_pos, fn_pos, st_score, fn_score
)


def test_supported_entity(recognizer):
"""Test that supported entity is correctly set."""
assert recognizer.supported_entities == ["TR_PHONE_NUMBER"]


def test_supported_language(recognizer):
"""Test that supported language is correctly set."""
assert recognizer.supported_language == "en"