microsoft · aaronaco · May 2, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ All notable changes to this project will be documented in this file.
 #### Added
 - Canadian SIN (`CA_SIN`) recognizer for the Canadian Social Insurance Number, using regex pattern matching, context words (English and French), and Luhn checksum validation. Disabled by default.
 
+- Philippines TIN (`PH_TIN`) recognizer for the Philippines Taxpayer Identification Number, using regex pattern matching, context words, and weighted modulo 11 checksum validation.
+
 - Swedish PII recognizers for `SE_PERSONNUMMER` to identify Swedish Personal ID Numbers using pattern match and checksum. The recognizer also supports Swedish coordination numbers (samordningsnummer), issued to individuals who are not registered residents in Sweden but require identification. All disabled by default.
 
 - German PII recognizers for `DE_TAX_ID` (Steueridentifikationsnummer, §§ 139a–139e AO, ISO 7064 Mod 11,10 checksum), `DE_TAX_NUMBER` (Steuernummer, § 139a AO, ELSTER and slash formats), `DE_PASSPORT` (Reisepassnummer, PassG § 4, ICAO Doc 9303), `DE_ID_CARD` (Personalausweisnummer, PAuswG), `DE_SOCIAL_SECURITY` (Rentenversicherungsnummer, § 147 SGB VI, DRV checksum), `DE_HEALTH_INSURANCE` (Krankenversicherungsnummer/KVNR, § 290 SGB V, GKV checksum), `DE_KFZ` (KFZ-Kennzeichen, FZV § 8), `DE_HANDELSREGISTER` (Handelsregisternummer HRA/HRB, §§ 9/14 HGB), and `DE_PLZ` (Postleitzahl, very low base confidence, context-only). All disabled by default.

diff --git a/docs/supported_entities.md b/docs/supported_entities.md
@@ -120,6 +120,11 @@ For more information, refer to the [adding new recognizers documentation](analyz
 | NG_NIN     | The Nigerian National Identification Number (NIN) is a unique 11-digit number issued by the National Identity Management Commission (NIMC). | Pattern match, context, and checksum |
 | NG_VEHICLE_REGISTRATION | Nigerian vehicle registration plate number in the current format (2011+): 3 letters (LGA code), 3 digits (serial), 2 letters (year/batch). | Pattern match and context |
 
+### Philippines
+| FieldType  | Description                                                                                             | Detection Method                         |
+|------------|---------------------------------------------------------------------------------------------------------|------------------------------------------|
+| PH_TIN     | Philippines Taxpayer Identification Number (TIN). 9 or 12-digit number issued by the Bureau of Internal Revenue (BIR). | Pattern match, context, and checksum |
+
 ### Canada
 
 |FieldType|Description|Detection Method|

diff --git a/e2e-tests/resources/test_ollama_enabled_recognizers.yaml b/e2e-tests/resources/test_ollama_enabled_recognizers.yaml
@@ -123,6 +123,12 @@ recognizers:
     - es
     type: predefined
 
+  - name: PhTinRecognizer
+    supported_languages: 
+    - en
+    type: predefined
+    enabled: false
+
   - name: ItDriverLicenseRecognizer
     supported_languages: 
     - it

diff --git a/presidio-analyzer/presidio_analyzer/conf/default_analyzer_full.yaml b/presidio-analyzer/presidio_analyzer/conf/default_analyzer_full.yaml
@@ -104,6 +104,10 @@ recognizer_registry:
       type: predefined
       enabled: false
 
+    - name: PhTinRecognizer
+      type: predefined
+      enabled: false
+
     - name: InAadhaarRecognizer
       supported_languages:
       - en

diff --git a/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml b/presidio-analyzer/presidio_analyzer/conf/default_recognizers.yaml
@@ -241,6 +241,12 @@ recognizers:
     - kr
     type: predefined
     enabled: false
+
+  - name: PhTinRecognizer
+    supported_languages: 
+    - en
+    type: predefined
+    enabled: false
 
   - name: SeOrganisationsnummerRecognizer
     supported_languages:

diff --git a/presidio-analyzer/presidio_analyzer/conf/slim.yaml b/presidio-analyzer/presidio_analyzer/conf/slim.yaml
@@ -73,6 +73,10 @@ recognizer_registry:
       type: predefined
       enabled: false
 
+    - name: PhTinRecognizer
+      type: predefined
+      enabled: false
+
     - name: InAadhaarRecognizer
       supported_languages:
       - en

diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/__init__.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/__init__.py
@@ -75,6 +75,9 @@
     NgVehicleRegistrationRecognizer,
 )
 
+# Philippines recognizers
+from .country_specific.philippines.ph_tin_recognizer import PhTinRecognizer
+
 # Poland recognizers
 from .country_specific.poland.pl_pesel_recognizer import PlPeselRecognizer
 
@@ -213,6 +216,7 @@
     "InPanRecognizer",
     "GLiNERRecognizer",
     "HuggingFaceNerRecognizer",
+    "PhTinRecognizer",
     "PlPeselRecognizer",
     "AzureAILanguageRecognizer",
     "InAadhaarRecognizer",

diff --git a/...nalyzer/presidio_analyzer/predefined_recognizers/country_specific/philippines/__init__.py b/...nalyzer/presidio_analyzer/predefined_recognizers/country_specific/philippines/__init__.py
@@ -0,0 +1,5 @@
+"""Philippines-specific recognizers package."""
+
+from .ph_tin_recognizer import PhTinRecognizer
+
+__all__ = ["PhTinRecognizer"]
diff --git a/...residio_analyzer/predefined_recognizers/country_specific/philippines/ph_tin_recognizer.py b/...residio_analyzer/predefined_recognizers/country_specific/philippines/ph_tin_recognizer.py
@@ -0,0 +1,102 @@
+from typing import List, Optional, Tuple
+
+from presidio_analyzer import Pattern, PatternRecognizer
+
+
+class PhTinRecognizer(PatternRecognizer):
+    """
+    Recognizes Philippines Taxpayer Identification Number (TIN).
+
+    The TIN is a 9 or 12-digit number issued by the Bureau of Internal Revenue (BIR).
+    The 9th digit is a check digit calculated using a weighted modulo 11 algorithm.
+    The last 3 digits (in the 12-digit version) represent the branch code (default 000).
+
+    Format: XXX-XXX-XXX-XXX or XXXXXXXXXXXX
+    Reference: https://www.bir.gov.ph/
+
+    :param patterns: List of patterns to be used by this recognizer
+    :param context: List of context words to increase confidence in detection
+    :param supported_language: Language this recognizer supports
+    :param supported_entity: The entity this recognizer can detect
+    :param replacement_pairs: List of tuples with potential replacement values
+    """
+
+    PATTERNS = [
+        Pattern(
+            "TIN (High)",
+            r"\b(\d{3}-\d{3}-\d{3}(-\d{3})?)\b",
+            0.6,
+        ),
+        Pattern(
+            "TIN (Medium)",
+            r"\b(\d{9}|\d{12})\b",
+            0.3,
+        ),
+    ]
+
+    CONTEXT = [
+        "tin",
+        "taxpayer identification number",
+        "bir",
+        "taxpayer id",
+        "tax id",
+        "rdo",
+        "revenue district office",
+    ]
+
+    def __init__(
+        self,
+        patterns: Optional[List[Pattern]] = None,
+        context: Optional[List[str]] = None,
+        supported_language: str = "en",
+        supported_entity: str = "PH_TIN",
+        replacement_pairs: Optional[List[Tuple[str, str]]] = None,
+        name: Optional[str] = None,
+    ):
+        self.replacement_pairs = (
+            replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
+        )
+        patterns = patterns if patterns else self.PATTERNS
+        context = context if context else self.CONTEXT
+        super().__init__(
+            supported_entity=supported_entity,
+            patterns=patterns,
+            context=context,
+            supported_language=supported_language,
+            name=name,
+        )
+
+    def validate_result(self, pattern_text: str) -> bool:
+        """
+        Validate the Philippines TIN using weighted modulo 11.
+
+        :param pattern_text: The text to validate
+        :return: True if valid, False otherwise
+        """
+        # Clean the input
+        for search, replace in self.replacement_pairs:
+            pattern_text = pattern_text.replace(search, replace)
+
+        if not pattern_text.isdigit():
+            return False
+
+        if len(pattern_text) not in [9, 12]:
+            return False
+
+        # Weights for the first 8 digits
+        weights = [9, 8, 7, 6, 5, 4, 3, 2]
+
+        # Calculate sum of first 8 digits multiplied by weights
+        total_sum = 0
+        for i in range(8):
+            total_sum += int(pattern_text[i]) * weights[i]
+
+        # Modulo 11 of the sum
+        remainder = total_sum % 11
+
+        # The 9th digit is the check digit
+        # Note: If remainder is 10, it's usually not issued or handled specifically.
+        # Most implementations for BIR TIN treat the remainder as the check digit.
+        check_digit = int(pattern_text[8])
+
+        return remainder == check_digit
diff --git a/presidio-analyzer/tests/data/context_sentences_tests.txt b/presidio-analyzer/tests/data/context_sentences_tests.txt
@@ -117,3 +117,11 @@ my PAN number is DJPMS1234Z
 #Verify IN PASSPORT context words
 IN_PASSPORT
 my passport number is T1234567. Indian Passport number is of 8 characters long, always starting with a capital letter.
+
+#Verify PH TIN context words
+PH_TIN
+my tin is 000-123-456-000
+
+#Verify PH TIN context words with bir
+PH_TIN
+my bir taxpayer identification number is 000123456
diff --git a/presidio-analyzer/tests/test_context_support.py b/presidio-analyzer/tests/test_context_support.py
@@ -17,7 +17,7 @@
     SgFinRecognizer,
     InPanRecognizer,
     InPassportRecognizer,
-
+    PhTinRecognizer,
 )
 from presidio_analyzer.nlp_engine import NlpArtifacts
 from presidio_analyzer.context_aware_enhancers import LemmaContextAwareEnhancer
@@ -37,6 +37,7 @@ def recognizers_map():
         "FIN": SgFinRecognizer(),
         "IN_PAN": InPanRecognizer(),
         "IN_PASSPORT": InPassportRecognizer(),
+        "PH_TIN": PhTinRecognizer(),
     }
     return rec_map
 
@@ -70,9 +71,9 @@ def dataset(recognizers_map):
             raise ValueError(f"bad entity type {entity_type}")
 
         test_items.append((item, recognizer, [entity_type]))
-    # Currently we have 34 sentences, this is a sanity check
-    if not len(test_items) == 34:
-        raise ValueError(f"expected 34 context sentences but found {len(test_items)}")
+    # Currently we have 36 sentences, this is a sanity check
+    if not len(test_items) == 36:
+        raise ValueError(f"expected 36 context sentences but found {len(test_items)}")
 
     yield test_items
 

diff --git a/presidio-analyzer/tests/test_ph_tin_recognizer.py b/presidio-analyzer/tests/test_ph_tin_recognizer.py
@@ -0,0 +1,48 @@
+import pytest
+from presidio_analyzer.predefined_recognizers.country_specific.philippines import (
+    PhTinRecognizer,
+)
+
+from tests import assert_result_within_score_range
+
+
+@pytest.fixture(scope="module")
+def recognizer():
+    """Return an instance of the PhTinRecognizer."""
+    return PhTinRecognizer()
+
+
+@pytest.fixture(scope="module")
+def entities():
+    """Return the entities supported by this recognizer."""
+    return ["PH_TIN"]
+
+
+@pytest.mark.parametrize(
+    "text, expected_len, expected_positions, expected_score_ranges",
+    [
+        # Valid TINs (using weighted modulo 11: 000-123-456-000 -> rem 6)
+        ("My TIN is 000-123-456-000", 1, [(10, 25)], [(0.1, 1.0)]),
+        ("BIR TIN: 000123456", 1, [(9, 18)], [(0.1, 1.0)]),
+        ("Tax ID: 000-123-456-001", 1, [(8, 23)], [(0.1, 1.0)]),
+        # Valid 9-digit with hyphens
+        ("TIN 000-123-456", 1, [(4, 15)], [(0.1, 1.0)]),
+        # Invalid TINs (wrong checksum)
+        ("Invalid TIN 000-123-457-000", 0, [], []),
+        ("Not a TIN 123456789", 0, [], []),
+        # Context tests
+        ("TIN: 000-123-456-000", 1, [(5, 20)], [(0.1, 1.0)]),
+        ("Please use 000-123-456-000 as your ID", 1, [(11, 26)], [(0.1, 1.0)]),
+    ],
+)
+def test_ph_tin_recognizer(
+    text, expected_len, expected_positions, expected_score_ranges, recognizer, entities
+):
+    """Test the PhTinRecognizer."""
+    results = recognizer.analyze(text, entities)
+    assert len(results) == expected_len
+    zip_res = zip(results, expected_positions, expected_score_ranges)
+    for res, pos, score_range in zip_res:
+        assert_result_within_score_range(
+            res, entities[0], pos[0], pos[1], score_range[0], score_range[1]
+        )