Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ All notable changes to this project will be documented in this file.
- ICAO Doc 9303 MRZ checksum validation in `DePassportRecognizer` and `DeIdCardRecognizer` (weights 7, 3, 1 repeating; letters A=10…Z=35; sum mod 10).
- Structural validation improvements in `DeBsnrRecognizer` per KBV Arztnummern-Richtlinie Anlage 1; valid KV regional codes are defined for defense-in-depth/documentation purposes, but unknown prefixes are not currently rejected (no public checksum exists for BSNR).
- Turkish PII recognizer for `TR_NATIONAL_ID` (TCKN) to identify Turkish National Identification Numbers using pattern match, context, and NVI checksum validation. Disabled by default.
- Turkish PII recognizer for `TR_LICENSE_PLATE` (plaka) to identify Turkish vehicle license plates using pattern match, context, and province code validation (01-81). Disabled by default.

## [2.2.362] - 2026-03-15
### General
Expand Down
1 change: 1 addition & 0 deletions docs/supported_entities.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ For more information, refer to the [adding new recognizers documentation](analyz
| FieldType | Description | Detection Method |
|------------|---------------------------------------------------------------------------------------------------------|------------------------------------------|
| TR_NATIONAL_ID | The Turkish National Identification Number (TCKN) is a unique 11-digit number issued to all Turkish citizens. | Pattern match, context and checksum. |
| TR_LICENSE_PLATE | Turkish vehicle license plate (plaka): 2-digit province code (01–81), 1–3 letters (A–Z, excluding Q, W, X), and 2–4 digits. Standard civilian format only. Legal basis: KTK Madde 23. | Pattern match, context and province code validation. |

### Germany

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,12 @@ recognizers:
type: predefined
enabled: false

- name: TrLicensePlateRecognizer
supported_languages:
- tr
type: predefined
enabled: false

- name: HuggingFaceNerRecognizer
supported_languages:
- en
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
from .country_specific.thai.th_tnin_recognizer import ThTninRecognizer

# Turkey recognizers
from .country_specific.turkey.tr_license_plate_recognizer import (
TrLicensePlateRecognizer,
)
from .country_specific.turkey.tr_national_id_recognizer import (
TrNationalIdRecognizer,
)
Expand Down Expand Up @@ -231,6 +234,7 @@
"KrFrnRecognizer",
"SeOrganisationsnummerRecognizer",
"ThTninRecognizer",
"TrLicensePlateRecognizer",
"TrNationalIdRecognizer",
"SePersonnummerRecognizer",
"LangExtractRecognizer",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Turkey-specific recognizers."""

from .tr_license_plate_recognizer import TrLicensePlateRecognizer
from .tr_national_id_recognizer import TrNationalIdRecognizer

__all__ = [
"TrLicensePlateRecognizer",
"TrNationalIdRecognizer",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List, Optional, Tuple, Union

from presidio_analyzer import EntityRecognizer, Pattern, PatternRecognizer


class TrLicensePlateRecognizer(PatternRecognizer):
"""
Recognize Turkish vehicle license plates (plaka).

Standard civilian format: [province_code 01-81] [1-3 letters] [2-4 digits].
Province codes: 01-81 (81 Turkish provinces).
Letters: A-Z excluding Q, W, X (not in Turkish alphabet).

Examples: 34 ABC 1234 (Istanbul), 06 A 123 (Ankara), 35 JK 12 (Izmir).

Legal basis: Karayolları Trafik Kanunu (KTK) Madde 23.
Data protection: KVKK (Kişisel Verilerin Korunması Kanunu) — license plates
constitute personal data when linked to an identifiable vehicle owner.

:param patterns: List of patterns to be used by this recognizer
:param context: List of context words to increase confidence in detection
:param supported_language: Language this recognizer supports
:param supported_entity: The entity this recognizer can detect
:param replacement_pairs: List of tuples with potential replacement values
for different strings to be used during pattern matching.
"""

PATTERNS = [
Pattern(
"TR License Plate (space)",
r"\b(0[1-9]|[1-7][0-9]|8[0-1])\s?[A-PR-VY-Z]{1,3}\s?\d{2,4}\b",
0.3,
),
Pattern(
"TR License Plate (hyphen)",
r"\b(0[1-9]|[1-7][0-9]|8[0-1])-[A-PR-VY-Z]{1,3}-\d{2,4}\b",
0.3,
),
]

CONTEXT = [
"plaka",
"araç plakası",
"plaka numarası",
"kayıt plakası",
"tr plaka",
"license plate",
"number plate",
"plate",
"taşıt plakası",
"kayıt",
]

def __init__(
self,
patterns: Optional[List[Pattern]] = None,
context: Optional[List[str]] = None,
supported_language: str = "tr",
supported_entity: str = "TR_LICENSE_PLATE",
replacement_pairs: Optional[List[Tuple[str, str]]] = None,
name: Optional[str] = None,
):
self.replacement_pairs = (
replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
)
patterns = patterns if patterns else self.PATTERNS
context = context if context else self.CONTEXT
super().__init__(
supported_entity=supported_entity,
patterns=patterns,
context=context,
supported_language=supported_language,
name=name,
)

def validate_result(self, pattern_text: str) -> Union[bool, None]:
"""
Validate the matched pattern by checking province code is 01-81.

:param pattern_text: The matched text to validated.
Only the part in text that was detected by the regex engine
:return: True if province code valid, False if invalid, None if not a plate
"""
sanitized_value = EntityRecognizer.sanitize_value(
pattern_text, self.replacement_pairs
)

if len(sanitized_value) >= 3:
province_code = sanitized_value[:2]
if province_code.isdigit():
code = int(province_code)
return 1 <= code <= 81

return None
124 changes: 124 additions & 0 deletions presidio-analyzer/tests/test_tr_license_plate_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Tests for Turkish license plate (TR_LICENSE_PLATE) recognizer."""

import pytest
from presidio_analyzer.predefined_recognizers import TrLicensePlateRecognizer

from tests import assert_result_within_score_range


@pytest.fixture(scope="module")
def recognizer():
"""Create a TrLicensePlateRecognizer instance for testing."""
return TrLicensePlateRecognizer()


@pytest.fixture(scope="module")
def entities():
"""Return the TR_LICENSE_PLATE entity type for testing."""
return ["TR_LICENSE_PLATE"]


@pytest.mark.parametrize(
"text, expected_len, expected_positions, expected_score_ranges",
[
("34 ABC 1234", 1, ((0, 11),), ((0.5, 1.0),)),
("06 A 123", 1, ((0, 8),), ((0.5, 1.0),)),
("35 JK 12", 1, ((0, 8),), ((0.5, 1.0),)),
("16 B 1234", 1, ((0, 9),), ((0.5, 1.0),)),
("34ABC1234", 1, ((0, 9),), ((0.5, 1.0),)),
("34 abc 1234", 1, ((0, 11),), ((0.5, 1.0),)),
(
"Araç plakası 34 ABC 1234 olarak kayıtlıdır.",
1,
((13, 24),),
((0.5, 1.0),),
),
(
"Plaka 34 ABC 1234 ve 06 JK 567",
2,
((6, 17), (21, 30)),
((0.5, 1.0), (0.5, 1.0)),
),
("01 A 12", 1, ((0, 7),), ((0.5, 1.0),)),
("81 A 12", 1, ((0, 7),), ((0.5, 1.0),)),
("07 AB 123", 1, ((0, 9),), ((0.5, 1.0),)),
("00 ABC 123", 0, (), ()),
("82 ABC 123", 0, (), ()),
("99 ABC 123", 0, (), ()),
("hello world", 0, (), ()),
("1234567890", 0, (), ()),
(
"License plate 34 ABC 1234",
1,
((14, 25),),
((0.5, 1.0),),
),
(
"Plaka numarası 06 A 123 olarak kayıtlı",
1,
((15, 23),),
((0.5, 1.0),),
),
],
)
def test_when_license_plate_in_text_then_all_plates_found(
text,
expected_len,
expected_positions,
expected_score_ranges,
recognizer,
entities,
):
"""Test that Turkish license plate recognizer correctly identifies plates."""
results = recognizer.analyze(text, entities)
assert len(results) == expected_len

for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
results, expected_positions, expected_score_ranges
):
assert_result_within_score_range(
res, entities[0], st_pos, fn_pos, st_score, fn_score
)


def test_validate_result_with_valid_province(recognizer):
"""Test validate_result with valid province codes."""
assert recognizer.validate_result("34 ABC 1234") is True
assert recognizer.validate_result("06 A 123") is True
assert recognizer.validate_result("01 A 12") is True
assert recognizer.validate_result("81 A 12") is True


def test_validate_result_with_invalid_province(recognizer):
"""Test validate_result with invalid province codes."""
assert recognizer.validate_result("00 ABC 123") is False
assert recognizer.validate_result("82 ABC 123") is False


def test_validate_result_with_short_input(recognizer):
"""Test validate_result with input shorter than 3 characters."""
assert recognizer.validate_result("12") is None
assert recognizer.validate_result("") is None


def test_validate_result_with_non_numeric_province(recognizer):
"""Test validate_result when province code is not numeric."""
assert recognizer.validate_result("AB ABC 123") is None
assert recognizer.validate_result("XY 123") is None


def test_context_words(recognizer):
"""Test that context words are properly set."""
assert "plaka" in recognizer.context
assert "araç plakası" in recognizer.context
assert "license plate" in recognizer.context


def test_supported_entity(recognizer):
"""Test that supported entity is correctly set."""
assert recognizer.supported_entities == ["TR_LICENSE_PLATE"]


def test_supported_language(recognizer):
"""Test that supported language is correctly set."""
assert recognizer.supported_language == "tr"
Loading