diff --git a/gateway/it/features/consumer-cost-based-ratelimit.feature b/gateway/it/features/consumer-cost-based-ratelimit.feature
new file mode 100644
index 000000000..1a3438066
--- /dev/null
+++ b/gateway/it/features/consumer-cost-based-ratelimit.feature
@@ -0,0 +1,490 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-cost-based-ratelimit
+Feature: Consumer Cost-Based Rate Limiting
+  As an API developer
+  I want cost limits to be enforced independently per GenAI application
+  So that one application exhausting its budget does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent cost budget
+    # mock-openai returns gpt-4.1-2025-04-14: 19 prompt × $2/1M + 10 completion × $8/1M = $0.0001180000
+    # Budget per consumer: $0.000236 = exactly 2 requests worth
+    # App A sends 2 requests (budget exhausted) and is blocked on the 3rd.
+    # App B is unaffected — its budget counter is still at $0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-template
+      spec:
+        displayName: CCBRL Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-provider
+      spec:
+        displayName: CCBRL Provider
+        version: v1.0
+        context: /ccbrl
+        template: ccbrl-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-app-a",
+        "apiKey": "ccbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-app-b",
+        "apiKey": "ccbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed, budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed, budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, budget exhausted
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent cost counter
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-b-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend cost limit blocks all consumers when shared budget is exhausted
+    # Backend limit: $0.000236/hour shared across all apps (exactly 2 requests worth).
+    # Consumer limit: $0.000236/hour per app independently.
+    # App A sends 2 requests — exhausts the shared backend budget.
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer budget is still at $0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-both-template
+      spec:
+        displayName: CCBRL Both Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-both-provider
+      spec:
+        displayName: CCBRL Both Provider
+        version: v1.0
+        context: /ccbrl-both
+        template: ccbrl-both-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-both-app-a",
+        "apiKey": "ccbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-both-app-b",
+        "apiKey": "ccbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed, shared backend budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed, shared backend budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend budget even though its own consumer budget is at $0
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-b-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" cost budget
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used so all unauthenticated requests count
+    # against the same "default" cost bucket (not the backend "routename" bucket).
+    # Budget: $0.000236/hour (2 requests worth at gpt-4.1-2025-04-14 pricing).
+    # After 2 requests the "default" budget is exhausted and further requests are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-fallback-template
+      spec:
+        displayName: CCBRL Fallback Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-fallback-provider
+      spec:
+        displayName: CCBRL Fallback Provider
+        version: v1.0
+        context: /ccbrl-fallback
+        template: ccbrl-fallback-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID, key = "ccbrl-fallback:default" — allowed, budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID, same "default" budget — allowed, budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" budget exhausted — blocked
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-fallback-template"
+    Then the response status code should be 200
+
+  Scenario: Consumer counter is not double-deducted when both backend and consumer limits are active
+    # This test guards against the llm_cost_delegate metadata key collision.
+    #
+    # Without the fix: both backend and consumer LLMCostRateLimitPolicy instances write
+    # their delegate reference to the same metadata key ("llm_cost_delegate"). The consumer
+    # overwrites the backend's entry. In the response phase (reverse order), the backend
+    # instance reads back the consumer's delegate and calls it — so the consumer's
+    # OnResponseBody runs twice. The consumer counter is drained twice as fast.
+    #
+    # With the fix: backend uses "llm_cost_delegate", consumer uses
+    # "llm_cost_delegate_consumer". Each instance reads back only its own delegate.
+    #
+    # Setup:
+    #   Backend limit:  $1/hour  (very high — never exhausted in this test)
+    #   Consumer limit: $0.000236/hour = exactly 2 requests at gpt-4.1-2025-04-14 pricing
+    #
+    # Expected (with fix):
+    #   request 1 → 200  (consumer deducted once: $0.000236 - $0.000118 = $0.000118 remaining)
+    #   request 2 → 200  (consumer deducted once: $0.000118 - $0.000118 = $0 remaining)
+    #   request 3 → 429  (consumer exhausted)
+    #
+    # Without fix:
+    #   request 1 → 200  (consumer deducted twice: $0.000236 - 2×$0.000118 = $0 remaining)
+    #   request 2 → 429  ← test fails here
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-nodbl-template
+      spec:
+        displayName: CCBRL No-Double Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-nodbl-provider
+      spec:
+        displayName: CCBRL No-Double Provider
+        version: v1.0
+        context: /ccbrl-nodbl
+        template: ccbrl-nodbl-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 1.0
+                      duration: "1h"
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-nodbl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-nodbl-app-a",
+        "apiKey": "ccbrl-nodbl-app-a-key-0000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — allowed; consumer budget: $0.000236 - $0.000118 = $0.000118 remaining
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — allowed; consumer budget: $0.000118 - $0.000118 = $0 remaining
+    # Without the fix this would be 429 because the consumer counter was double-deducted on request 1
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — blocked; consumer budget exhausted
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-nodbl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-nodbl-template"
+    Then the response status code should be 200
\ No newline at end of file
diff --git a/gateway/it/features/consumer-request-based-ratelimit.feature b/gateway/it/features/consumer-request-based-ratelimit.feature
new file mode 100644
index 000000000..3cc73d6a2
--- /dev/null
+++ b/gateway/it/features/consumer-request-based-ratelimit.feature
@@ -0,0 +1,365 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-request-based-ratelimit
+Feature: Consumer Request-Based Rate Limiting
+  As an API developer
+  I want request count limits to be enforced independently per GenAI application
+  So that one application exhausting its request quota does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent request counter
+    # Each app gets 2 requests/hour independently.
+    # App A sends 2 requests (limit reached) and gets blocked on the 3rd.
+    # App B is unaffected — its counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-template
+      spec:
+        displayName: CRBRL Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-provider
+      spec:
+        displayName: CRBRL Provider
+        version: v1.0
+        context: /crbrl
+        template: crbrl-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 2
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-app-a",
+        "apiKey": "crbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-app-b",
+        "apiKey": "crbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed (counter: 1/2)
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed (counter: 2/2, limit reached)
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, request quota exhausted
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent counter
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-b-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend request limit blocks all consumers when shared quota is exhausted
+    # Backend limit: 3 requests/hour shared across all apps.
+    # Consumer limit: 3 requests/hour per app independently.
+    # App A sends 3 requests — exhausts the shared backend counter.
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-both-template
+      spec:
+        displayName: CRBRL Both Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-both-provider
+      spec:
+        displayName: CRBRL Both Provider
+        version: v1.0
+        context: /crbrl-both
+        template: crbrl-both-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: backend-request-limit
+                      limits:
+                        - limit: 3
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 3
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-both-app-a",
+        "apiKey": "crbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-both-app-b",
+        "apiKey": "crbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: requests 1-3 — exhausts the shared backend counter (3/3)
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend counter even though its own consumer counter is at 0
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-b-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" counter
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used instead of a "_missing_metadata_*_"
+    # placeholder, so all unauthenticated requests count against the same "default" bucket.
+    # Limit: 2 requests/hour. After 2 requests the "default" counter is exhausted and
+    # all further requests (still with no app ID) are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-fallback-template
+      spec:
+        displayName: CRBRL Fallback Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-fallback-provider
+      spec:
+        displayName: CRBRL Fallback Provider
+        version: v1.0
+        context: /crbrl-fallback
+        template: crbrl-fallback-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 2
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+                          fallback: default
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID in metadata, key = "crbrl-fallback:default" — allowed (1/2)
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID in metadata, same "default" counter — allowed (2/2)
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" counter exhausted — blocked
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-fallback-template"
+    Then the response status code should be 200
\ No newline at end of file
diff --git a/gateway/it/features/consumer-token-based-ratelimit.feature b/gateway/it/features/consumer-token-based-ratelimit.feature
new file mode 100644
index 000000000..b938a7c76
--- /dev/null
+++ b/gateway/it/features/consumer-token-based-ratelimit.feature
@@ -0,0 +1,409 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-token-based-ratelimit
+Feature: Consumer Token-Based Rate Limiting
+  As an API developer
+  I want token limits to be enforced independently per GenAI application
+  So that one application exhausting its budget does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent token counter
+    # Each app gets 20 total tokens/hour independently.
+    # mock-openai returns usage.total_tokens = 10 per request.
+    # App A uses 2 requests (20 tokens) and gets blocked on the 3rd.
+    # App B is unaffected — its counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-template
+      spec:
+        displayName: CTBRL Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-provider
+      spec:
+        displayName: CTBRL Provider
+        version: v1.0
+        context: /ctbrl
+        template: ctbrl-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 20
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A (pre-set known value, min 36 chars)
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-app-a",
+        "apiKey": "ctbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-app-b",
+        "apiKey": "ctbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — consumes 10 tokens (counter: 10/20)
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — consumes 10 more tokens (counter: 20/20, limit reached)
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, token budget exhausted
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent counter
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-b-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend limit blocks all consumers when shared budget is exhausted
+    # Backend limit: 30 total tokens/hour shared across all apps.
+    # Consumer limit: 30 total tokens/hour per app independently.
+    # Each request uses 10 tokens (via echo backend).
+    # App A sends 3 requests — exhausts the backend shared counter (30/30).
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer counter is only at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-both-template
+      spec:
+        displayName: CTBRL Both Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-both-provider
+      spec:
+        displayName: CTBRL Both Provider
+        version: v1.0
+        context: /ctbrl-both
+        template: ctbrl-both-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 30
+                      duration: "1h"
+                  algorithm: fixed-window
+                  backend: memory
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 30
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-both-app-a",
+        "apiKey": "ctbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-both-app-b",
+        "apiKey": "ctbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: requests 1-3 — exhausts the shared backend counter (30 tokens)
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend counter even though its own consumer counter is at 0
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-b-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" token counter
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used so all unauthenticated requests count
+    # against the same "default" token bucket (not against the backend "routename" bucket).
+    # Limit: 20 total tokens/hour. Each request consumes 10 tokens via echo backend.
+    # After 2 requests (20 tokens) the "default" counter is exhausted and further
+    # requests are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-fallback-template
+      spec:
+        displayName: CTBRL Fallback Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-fallback-provider
+      spec:
+        displayName: CTBRL Fallback Provider
+        version: v1.0
+        context: /ctbrl-fallback
+        template: ctbrl-fallback-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 20
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID, key = "ctbrl-fallback:default" — allowed, 10 tokens consumed (10/20)
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID, same "default" counter — allowed (20/20, limit reached)
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" token counter exhausted — blocked
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-fallback-template"
+    Then the response status code should be 200
\ No newline at end of file
diff --git a/gateway/it/suite_test.go b/gateway/it/suite_test.go
index aa52a9962..c9d1b5601 100644
--- a/gateway/it/suite_test.go
+++ b/gateway/it/suite_test.go
@@ -131,6 +131,9 @@ func getFeaturePaths() []string {
 		"features/cel-conditions.feature",
 		"features/analytics-basic.feature",
 		"features/token-based-ratelimit.feature",
+		"features/consumer-token-based-ratelimit.feature",
+		"features/consumer-request-based-ratelimit.feature",
+		"features/consumer-cost-based-ratelimit.feature",
 		"features/sandbox-routing.feature",
 		"features/subscription-validation.feature",
 		"features/subscription-analytics.feature",