diff --git a/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml b/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml
index cfb10d737..5c4d19190 100644
--- a/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml
+++ b/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml
@@ -1,5 +1,5 @@
 name: advanced-ratelimit
-version: v1.0.2
+version: v1.0.3
 description: |
   Applies advanced rate limits using fixed-window (default) or GCRA algorithms with multi-quota controls,
   configurable key and cost extraction, and memory or Redis storage.
diff --git a/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml b/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml
index a8782a0a0..cf717d191 100644
--- a/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml
+++ b/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml
@@ -1,5 +1,5 @@
 name: llm-cost-based-ratelimit
-version: v1.0.2
+version: v1.0.3
 description: |
   A specialized rate limiting policy for LLMs that limits usage based on monetary budgets.
   The policy reads costs from SharedContext.Metadata under "x-llm-cost" (set by the llm-cost
@@ -42,6 +42,16 @@ parameters:
               Examples: "1h" (1 hour), "24h" (1 day), "168h" (1 week), "720h" (30 days)
             pattern: "^(([0-9]+(\\.[0-9]*)?|\\.[0-9]+)(ns|us|µs|ms|s|m|h))+$"
 
+    consumerBased:
+      type: boolean
+      x-wso2-policy-advanced-param: false
+      description: |
+        When true, rate limits are applied per consumer (GenAI application) identified
+        by the x-wso2-application-id metadata key set by the api-key-auth policy.
+        Each application gets its own independent cost counter.
+        When false (default), a single shared limit applies across all consumers.
+      default: false
+
 systemParameters:
   type: object
   additionalProperties: false
diff --git a/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml b/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml
index 644a3cb49..54c32a8fd 100644
--- a/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml
+++ b/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml
@@ -1,5 +1,5 @@
 name: token-based-ratelimit
-version: v1.0.2
+version: v1.0.3
 description: |
   Enforces token-based rate limits for LLM traffic by resolving token extraction
   paths from provider templates and delegating enforcement to the
@@ -75,6 +75,15 @@ parameters:
             description: Specifies the duration window for the limit as a Go
               duration string, for example "1s", "1m", "1h", or "24h".
             pattern: "^[-+]?(([0-9]+(\\.[0-9]*)?|\\.[0-9]+)(ns|us|µs|ms|s|m|h))+$"
+    consumerBased:
+      type: boolean
+      x-wso2-policy-advanced-param: false
+      description: |
+        When true, rate limits are applied per consumer (GenAI application) identified
+        by the x-wso2-application-id metadata key set by the api-key-auth policy.
+        Each application gets its own independent token counter.
+        When false (default), a single shared limit applies across all consumers.
+      default: false
   oneOf:
     - required: ["promptTokenLimits"]
       not:
diff --git a/gateway/it/features/consumer-cost-based-ratelimit.feature b/gateway/it/features/consumer-cost-based-ratelimit.feature
new file mode 100644
index 000000000..ebb745524
--- /dev/null
+++ b/gateway/it/features/consumer-cost-based-ratelimit.feature
@@ -0,0 +1,490 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-cost-based-ratelimit
+Feature: Consumer Cost-Based Rate Limiting
+  As an API developer
+  I want cost limits to be enforced independently per GenAI application
+  So that one application exhausting its budget does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent cost budget
+    # mock-openai returns gpt-4.1-2025-04-14: 19 prompt × $2/1M + 10 completion × $8/1M = $0.0001180000
+    # Budget per consumer: $0.000236 = exactly 2 requests worth
+    # App A sends 2 requests (budget exhausted) and is blocked on the 3rd.
+    # App B is unaffected — its budget counter is still at $0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-template
+      spec:
+        displayName: CCBRL Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-provider
+      spec:
+        displayName: CCBRL Provider
+        version: v1.0
+        context: /ccbrl
+        template: ccbrl-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-app-a",
+        "apiKey": "ccbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-app-b",
+        "apiKey": "ccbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed, budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed, budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, budget exhausted
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent cost counter
+    When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-b-key-000000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend cost limit blocks all consumers when shared budget is exhausted
+    # Backend limit: $0.000236/hour shared across all apps (exactly 2 requests worth).
+    # Consumer limit: $0.000236/hour per app independently.
+    # App A sends 2 requests — exhausts the shared backend budget.
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer budget is still at $0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-both-template
+      spec:
+        displayName: CCBRL Both Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-both-provider
+      spec:
+        displayName: CCBRL Both Provider
+        version: v1.0
+        context: /ccbrl-both
+        template: ccbrl-both-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-both-app-a",
+        "apiKey": "ccbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-both-app-b",
+        "apiKey": "ccbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed, shared backend budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed, shared backend budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend budget even though its own consumer budget is at $0
+    When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-b-key-00000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" cost budget
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used so all unauthenticated requests count
+    # against the same "default" cost bucket (not the backend "routename" bucket).
+    # Budget: $0.000236/hour (2 requests worth at gpt-4.1-2025-04-14 pricing).
+    # After 2 requests the "default" budget is exhausted and further requests are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-fallback-template
+      spec:
+        displayName: CCBRL Fallback Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-fallback-provider
+      spec:
+        displayName: CCBRL Fallback Provider
+        version: v1.0
+        context: /ccbrl-fallback
+        template: ccbrl-fallback-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID, key = "ccbrl-fallback:default" — allowed, budget drops to $0.000118
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID, same "default" budget — allowed, budget reaches exactly $0
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" budget exhausted — blocked
+    When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-fallback-template"
+    Then the response status code should be 200
+
+  Scenario: Consumer counter is not double-deducted when both backend and consumer limits are active
+    # This test guards against the llm_cost_delegate metadata key collision.
+    #
+    # Without the fix: both backend and consumer LLMCostRateLimitPolicy instances write
+    # their delegate reference to the same metadata key ("llm_cost_delegate"). The consumer
+    # overwrites the backend's entry. In the response phase (reverse order), the backend
+    # instance reads back the consumer's delegate and calls it — so the consumer's
+    # OnResponseBody runs twice. The consumer counter is drained twice as fast.
+    #
+    # With the fix: backend uses "llm_cost_delegate", consumer uses
+    # "llm_cost_delegate_consumer". Each instance reads back only its own delegate.
+    #
+    # Setup:
+    #   Backend limit:  $1/hour  (very high — never exhausted in this test)
+    #   Consumer limit: $0.000236/hour = exactly 2 requests at gpt-4.1-2025-04-14 pricing
+    #
+    # Expected (with fix):
+    #   request 1 → 200  (consumer deducted once: $0.000236 - $0.000118 = $0.000118 remaining)
+    #   request 2 → 200  (consumer deducted once: $0.000118 - $0.000118 = $0 remaining)
+    #   request 3 → 429  (consumer exhausted)
+    #
+    # Without fix:
+    #   request 1 → 200  (consumer deducted twice: $0.000236 - 2×$0.000118 = $0 remaining)
+    #   request 2 → 429  ← test fails here
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ccbrl-nodbl-template
+      spec:
+        displayName: CCBRL No-Double Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ccbrl-nodbl-provider
+      spec:
+        displayName: CCBRL No-Double Provider
+        version: v1.0
+        context: /ccbrl-nodbl
+        template: ccbrl-nodbl-template
+        upstream:
+          url: http://mock-openapi:4010
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 1.0
+                      duration: "1h"
+          - name: llm-cost-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  budgetLimits:
+                    - amount: 0.000236
+                      duration: "1h"
+                  consumerBased: true
+          - name: llm-cost
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-nodbl-provider/api-keys" with body:
+      """
+      {
+        "name": "ccbrl-nodbl-app-a",
+        "apiKey": "ccbrl-nodbl-app-a-key-0000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — allowed; consumer budget: $0.000236 - $0.000118 = $0.000118 remaining
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — allowed; consumer budget: $0.000118 - $0.000118 = $0 remaining
+    # Without the fix this would be 429 because the consumer counter was double-deducted on request 1
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — blocked; consumer budget exhausted
+    When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body:
+      """ json
+      {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ccbrl-nodbl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ccbrl-nodbl-template"
+    Then the response status code should be 200
diff --git a/gateway/it/features/consumer-request-based-ratelimit.feature b/gateway/it/features/consumer-request-based-ratelimit.feature
new file mode 100644
index 000000000..921d7a642
--- /dev/null
+++ b/gateway/it/features/consumer-request-based-ratelimit.feature
@@ -0,0 +1,365 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-request-based-ratelimit
+Feature: Consumer Request-Based Rate Limiting
+  As an API developer
+  I want request count limits to be enforced independently per GenAI application
+  So that one application exhausting its request quota does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent request counter
+    # Each app gets 2 requests/hour independently.
+    # App A sends 2 requests (limit reached) and gets blocked on the 3rd.
+    # App B is unaffected — its counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-template
+      spec:
+        displayName: CRBRL Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-provider
+      spec:
+        displayName: CRBRL Provider
+        version: v1.0
+        context: /crbrl
+        template: crbrl-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 2
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-app-a",
+        "apiKey": "crbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-app-b",
+        "apiKey": "crbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — allowed (counter: 1/2)
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — allowed (counter: 2/2, limit reached)
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, request quota exhausted
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent counter
+    When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-b-key-000000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend request limit blocks all consumers when shared quota is exhausted
+    # Backend limit: 3 requests/hour shared across all apps.
+    # Consumer limit: 3 requests/hour per app independently.
+    # App A sends 3 requests — exhausts the shared backend counter.
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-both-template
+      spec:
+        displayName: CRBRL Both Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-both-provider
+      spec:
+        displayName: CRBRL Both Provider
+        version: v1.0
+        context: /crbrl-both
+        template: crbrl-both-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: backend-request-limit
+                      limits:
+                        - limit: 3
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 3
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-both-app-a",
+        "apiKey": "crbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "crbrl-both-app-b",
+        "apiKey": "crbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: requests 1-3 — exhausts the shared backend counter (3/3)
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend counter even though its own consumer counter is at 0
+    When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-b-key-00000000000000000000000" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" counter
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used instead of a "_missing_metadata_*_"
+    # placeholder, so all unauthenticated requests count against the same "default" bucket.
+    # Limit: 2 requests/hour. After 2 requests the "default" counter is exhausted and
+    # all further requests (still with no app ID) are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: crbrl-fallback-template
+      spec:
+        displayName: CRBRL Fallback Template
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: crbrl-fallback-provider
+      spec:
+        displayName: CRBRL Fallback Provider
+        version: v1.0
+        context: /crbrl-fallback
+        template: crbrl-fallback-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: advanced-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  quotas:
+                    - name: consumer-request-limit
+                      limits:
+                        - limit: 2
+                          duration: "1h"
+                      keyExtraction:
+                        - type: routename
+                        - type: metadata
+                          key: x-wso2-application-id
+                          fallback: default
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID in metadata, key = "crbrl-fallback:default" — allowed (1/2)
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID in metadata, same "default" counter — allowed (2/2)
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" counter exhausted — blocked
+    When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body:
+      """
+      {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "crbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "crbrl-fallback-template"
+    Then the response status code should be 200
diff --git a/gateway/it/features/consumer-token-based-ratelimit.feature b/gateway/it/features/consumer-token-based-ratelimit.feature
new file mode 100644
index 000000000..c7e08a464
--- /dev/null
+++ b/gateway/it/features/consumer-token-based-ratelimit.feature
@@ -0,0 +1,409 @@
+# --------------------------------------------------------------------
+# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
+#
+# WSO2 LLC. licenses this file to you under the Apache License,
+# Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# --------------------------------------------------------------------
+
+@consumer-token-based-ratelimit
+Feature: Consumer Token-Based Rate Limiting
+  As an API developer
+  I want token limits to be enforced independently per GenAI application
+  So that one application exhausting its budget does not block other applications
+
+  Background:
+    Given the gateway services are running
+    And I authenticate using basic auth as "admin"
+
+  Scenario: Each consumer gets an independent token counter
+    # Each app gets 20 total tokens/hour independently.
+    # mock-openai returns usage.total_tokens = 10 per request.
+    # App A uses 2 requests (20 tokens) and gets blocked on the 3rd.
+    # App B is unaffected — its counter is still at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-template
+      spec:
+        displayName: CTBRL Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-provider
+      spec:
+        displayName: CTBRL Provider
+        version: v1.0
+        context: /ctbrl
+        template: ctbrl-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 20
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A (pre-set known value, min 36 chars)
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-app-a",
+        "apiKey": "ctbrl-app-a-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-app-b",
+        "apiKey": "ctbrl-app-b-key-000000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: request 1 — consumes 10 tokens (counter: 10/20)
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App A: request 2 — consumes 10 more tokens (counter: 20/20, limit reached)
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App A: request 3 — blocked, token budget exhausted
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # App B: request 1 — should succeed, App B has its own independent counter
+    When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-b-key-000000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-template"
+    Then the response status code should be 200
+
+  Scenario: Backend limit blocks all consumers when shared budget is exhausted
+    # Backend limit: 30 total tokens/hour shared across all apps.
+    # Consumer limit: 30 total tokens/hour per app independently.
+    # Each request uses 10 tokens (via echo backend).
+    # App A sends 3 requests — exhausts the backend shared counter (30/30).
+    # App B's next request is blocked by the backend limit even though
+    # App B's own consumer counter is only at 0.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-both-template
+      spec:
+        displayName: CTBRL Both Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-both-provider
+      spec:
+        displayName: CTBRL Both Provider
+        version: v1.0
+        context: /ctbrl-both
+        template: ctbrl-both-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: api-key-auth
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  key: x-api-key
+                  in: header
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 30
+                      duration: "1h"
+                  algorithm: fixed-window
+                  backend: memory
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 30
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    # Create API key for App A
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-both-app-a",
+        "apiKey": "ctbrl-both-app-a-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+
+    # Create API key for App B
+    When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body:
+      """
+      {
+        "name": "ctbrl-both-app-b",
+        "apiKey": "ctbrl-both-app-b-key-00000000000000000000000"
+      }
+      """
+    Then the response status code should be 201
+    And I wait for 2 seconds
+
+    Given I set header "Content-Type" to "application/json"
+
+    # App A: requests 1-3 — exhausts the shared backend counter (30 tokens)
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # App B: blocked by the shared backend counter even though its own consumer counter is at 0
+    When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-b-key-00000000000000000000000" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-both-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-both-template"
+    Then the response status code should be 200
+
+  Scenario: Requests without an app ID share a single "default" token counter
+    # When no api-key-auth is in the chain, x-wso2-application-id is never written to
+    # metadata. The fallback key "default" is used so all unauthenticated requests count
+    # against the same "default" token bucket (not against the backend "routename" bucket).
+    # Limit: 20 total tokens/hour. Each request consumes 10 tokens via echo backend.
+    # After 2 requests (20 tokens) the "default" counter is exhausted and further
+    # requests are blocked.
+    When I create this LLM provider template:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProviderTemplate
+      metadata:
+        name: ctbrl-fallback-template
+      spec:
+        displayName: CTBRL Fallback Template
+        totalTokens:
+          location: payload
+          identifier: $.json.usage.total_tokens
+      """
+    Then the response status code should be 201
+
+    When I create this LLM provider:
+      """
+      apiVersion: gateway.api-platform.wso2.com/v1alpha1
+      kind: LlmProvider
+      metadata:
+        name: ctbrl-fallback-provider
+      spec:
+        displayName: CTBRL Fallback Provider
+        version: v1.0
+        context: /ctbrl-fallback
+        template: ctbrl-fallback-template
+        upstream:
+          url: http://echo-backend-multi-arch:8080/anything
+          auth:
+            type: api-key
+            header: Authorization
+            value: test-key
+        accessControl:
+          mode: allow_all
+        policies:
+          - name: token-based-ratelimit
+            version: v1
+            paths:
+              - path: /*
+                methods: ['*']
+                params:
+                  totalTokenLimits:
+                    - count: 20
+                      duration: "1h"
+                  consumerBased: true
+                  algorithm: fixed-window
+                  backend: memory
+      """
+    Then the response status code should be 201
+    And I wait for policy snapshot sync
+
+    Given I set header "Content-Type" to "application/json"
+
+    # Request 1 — no app ID, key = "ctbrl-fallback:default" — allowed, 10 tokens consumed (10/20)
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Request 2 — no app ID, same "default" counter — allowed (20/20, limit reached)
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 200
+
+    # Request 3 — "default" token counter exhausted — blocked
+    When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body:
+      """
+      {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10}
+      }
+      """
+    Then the response status code should be 429
+
+    # Cleanup
+    Given I authenticate using basic auth as "admin"
+    When I delete the LLM provider "ctbrl-fallback-provider"
+    Then the response status code should be 200
+    When I delete the LLM provider template "ctbrl-fallback-template"
+    Then the response status code should be 200
diff --git a/gateway/it/suite_test.go b/gateway/it/suite_test.go
index aa52a9962..c9d1b5601 100644
--- a/gateway/it/suite_test.go
+++ b/gateway/it/suite_test.go
@@ -131,6 +131,9 @@ func getFeaturePaths() []string {
 		"features/cel-conditions.feature",
 		"features/analytics-basic.feature",
 		"features/token-based-ratelimit.feature",
+		"features/consumer-token-based-ratelimit.feature",
+		"features/consumer-request-based-ratelimit.feature",
+		"features/consumer-cost-based-ratelimit.feature",
 		"features/sandbox-routing.feature",
 		"features/subscription-validation.feature",
 		"features/subscription-analytics.feature",
diff --git a/platform-api/src/internal/service/llm_deployment.go b/platform-api/src/internal/service/llm_deployment.go
index 602d1408d..17904577d 100644
--- a/platform-api/src/internal/service/llm_deployment.go
+++ b/platform-api/src/internal/service/llm_deployment.go
@@ -668,6 +668,41 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand
 						},
 					})
 				}
+				if providerLevel.Global.Cost != nil && providerLevel.Global.Cost.Enabled {
+					costLimit := providerLevel.Global.Cost
+					duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit)
+					if err != nil {
+						return "", fmt.Errorf("invalid cost reset window: %w", err)
+					}
+					policies = append(policies, api.LLMPolicy{
+						Name:    llmCostBasedRateLimitPolicyName,
+						Version: "",
+						Paths: []api.LLMPolicyPath{
+							{
+								Path:    "/*",
+								Methods: []api.LLMPolicyPathMethods{"*"},
+								Params: map[string]interface{}{
+									"budgetLimits": []map[string]interface{}{
+										{"amount": costLimit.Amount, "duration": duration},
+									},
+								},
+							},
+						},
+					})
+					if !hasPolicy(policies, llmCostPolicyName) {
+						policies = append(policies, api.LLMPolicy{
+							Name:    llmCostPolicyName,
+							Version: "",
+							Paths: []api.LLMPolicyPath{
+								{
+									Path:    "/*",
+									Methods: []api.LLMPolicyPathMethods{"*"},
+									Params:  map[string]interface{}{},
+								},
+							},
+						})
+					}
+				}
 			} else if providerLevel.ResourceWise != nil {
 				// Step 2.2 Handle resource-wise rate limiting
 				defaultLimit := &providerLevel.ResourceWise.Default
@@ -731,6 +766,37 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand
 						},
 					})
 				}
+				if defaultLimit.Cost != nil && defaultLimit.Cost.Enabled {
+					costLimit := defaultLimit.Cost
+					duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit)
+					if err != nil {
+						return "", fmt.Errorf("invalid cost reset window: %w", err)
+					}
+					policies = append(policies, api.LLMPolicy{
+						Name:    llmCostBasedRateLimitPolicyName,
+						Version: "",
+						Paths: []api.LLMPolicyPath{
+							{
+								Path:    "/*",
+								Methods: []api.LLMPolicyPathMethods{"*"},
+								Params: map[string]interface{}{
+									"budgetLimits": []map[string]interface{}{
+										{"amount": costLimit.Amount, "duration": duration},
+									},
+								},
+							},
+						},
+					})
+					if !hasPolicy(policies, llmCostPolicyName) {
+						policies = append(policies, api.LLMPolicy{
+							Name:    llmCostPolicyName,
+							Version: "",
+							Paths: []api.LLMPolicyPath{
+								{Path: "/*", Methods: []api.LLMPolicyPathMethods{"*"}, Params: map[string]interface{}{}},
+							},
+						})
+					}
+				}
 
 				// Step 2.2.2 Resource-wise rate limit
 				for _, r := range providerLevel.ResourceWise.Resources {
@@ -779,6 +845,35 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand
 							},
 						})
 					}
+					if r.Limit.Cost != nil && r.Limit.Cost.Enabled {
+						costLimit := r.Limit.Cost
+						duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit)
+						if err != nil {
+							return "", fmt.Errorf("invalid cost reset window for resource %s: %w", r.Resource, err)
+						}
+						addOrAppendPolicyPath(&policies, llmCostBasedRateLimitPolicyName, "", api.LLMPolicyPath{
+							Path:    r.Resource,
+							Methods: []api.LLMPolicyPathMethods{"*"},
+							Params: map[string]interface{}{
+								"budgetLimits": []map[string]interface{}{
+									{"amount": costLimit.Amount, "duration": duration},
+								},
+							},
+						})
+						if !hasPolicy(policies, llmCostPolicyName) {
+							policies = append(policies, api.LLMPolicy{
+								Name:    llmCostPolicyName,
+								Version: "",
+								Paths: []api.LLMPolicyPath{
+									{
+										Path:    "/*",
+										Methods: []api.LLMPolicyPathMethods{"*"},
+										Params:  map[string]interface{}{},
+									},
+								},
+							})
+						}
+					}
 				}
 			}
 		}
@@ -787,11 +882,183 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand
 		consumerLevel := rateLimit.ConsumerLevel
 		if consumerLevel != nil {
 			if consumerLevel.Global != nil {
-				// Handle global rate limiting
-				// TODO: Convert global rate limit to policy format
+				if consumerLevel.Global.Token != nil && consumerLevel.Global.Token.Enabled {
+					tokenLimit := consumerLevel.Global.Token
+					duration, err := formatRateLimitDuration(tokenLimit.Reset.Duration, tokenLimit.Reset.Unit)
+					if err != nil {
+						return "", fmt.Errorf("invalid consumer token reset window: %w", err)
+					}
+					policies = append(policies, api.LLMPolicy{
+						Name:    tokenBasedRateLimitPolicyName,
+						Version: "",
+						Paths: []api.LLMPolicyPath{
+							{
+								Path:    "/*",
+								Methods: []api.LLMPolicyPathMethods{"*"},
+								Params: map[string]interface{}{
+									"totalTokenLimits": []map[string]interface{}{
+										{
+											"count":    tokenLimit.Count,
+											"duration": duration,
+										},
+									},
+									"consumerBased": true,
+								},
+							},
+						},
+					})
+				}
+				if consumerLevel.Global.Request != nil && consumerLevel.Global.Request.Enabled {
+					requestLimit := consumerLevel.Global.Request
+					duration, err := formatRateLimitDuration(requestLimit.Reset.Duration, requestLimit.Reset.Unit)
+					if err != nil {
+						return "", fmt.Errorf("invalid consumer request reset window: %w", err)
+					}
+					policies = append(policies, api.LLMPolicy{
+						Name:    advancedRateLimitPolicyName,
+						Version: "",
+						Paths: []api.LLMPolicyPath{
+							{
+								Path:    "/*",
+								Methods: []api.LLMPolicyPathMethods{"*"},
+								Params: map[string]interface{}{
+									"quotas": []map[string]interface{}{
+										{
+											"name": "consumer-request-limit",
+											"limits": []map[string]interface{}{
+												{
+													"limit":    requestLimit.Count,
+													"duration": duration,
+												},
+											},
+											"keyExtraction": []map[string]interface{}{
+												{"type": "routename"},
+												{"type": "metadata", "key": "x-wso2-application-id"},
+											},
+										},
+									},
+								},
+							},
+						},
+					})
+				}
+				if consumerLevel.Global.Cost != nil && consumerLevel.Global.Cost.Enabled {
+					costLimit := consumerLevel.Global.Cost
+					duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit)
+					if err != nil {
+						return "", fmt.Errorf("invalid consumer cost reset window: %w", err)
+					}
+					policies = append(policies, api.LLMPolicy{
+						Name:    llmCostBasedRateLimitPolicyName,
+						Version: "",
+						Paths: []api.LLMPolicyPath{
+							{
+								Path:    "/*",
+								Methods: []api.LLMPolicyPathMethods{"*"},
+								Params: map[string]interface{}{
+									"budgetLimits": []map[string]interface{}{
+										{"amount": costLimit.Amount, "duration": duration},
+									},
+									"consumerBased": true,
+								},
+							},
+						},
+					})
+					if !hasPolicy(policies, llmCostPolicyName) {
+						policies = append(policies, api.LLMPolicy{
+							Name:    llmCostPolicyName,
+							Version: "",
+							Paths: []api.LLMPolicyPath{
+								{
+									Path:    "/*",
+									Methods: []api.LLMPolicyPathMethods{"*"},
+									Params:  map[string]interface{}{},
+								},
+							},
+						})
+					}
+				}
 			} else if consumerLevel.ResourceWise != nil {
-				// Handle resource-wise rate limiting
-				// TODO: Convert resource-wise rate limit to policy format
+				for _, r := range consumerLevel.ResourceWise.Resources {
+					if r.Limit.Token != nil && r.Limit.Token.Enabled {
+						tokenLimit := r.Limit.Token
+						duration, err := formatRateLimitDuration(tokenLimit.Reset.Duration, tokenLimit.Reset.Unit)
+						if err != nil {
+							return "", fmt.Errorf("invalid consumer token reset window for resource %s: %w", r.Resource, err)
+						}
+						addOrAppendPolicyPath(&policies, tokenBasedRateLimitPolicyName, "", api.LLMPolicyPath{
+							Path:    r.Resource,
+							Methods: []api.LLMPolicyPathMethods{"*"},
+							Params: map[string]interface{}{
+								"totalTokenLimits": []map[string]interface{}{
+									{
+										"count":    tokenLimit.Count,
+										"duration": duration,
+									},
+								},
+								"consumerBased": true,
+							},
+						})
+					}
+					if r.Limit.Request != nil && r.Limit.Request.Enabled {
+						requestLimit := r.Limit.Request
+						duration, err := formatRateLimitDuration(requestLimit.Reset.Duration, requestLimit.Reset.Unit)
+						if err != nil {
+							return "", fmt.Errorf("invalid consumer request reset window for resource %s: %w", r.Resource, err)
+						}
+						addOrAppendPolicyPath(&policies, advancedRateLimitPolicyName, "", api.LLMPolicyPath{
+							Path:    r.Resource,
+							Methods: []api.LLMPolicyPathMethods{"*"},
+							Params: map[string]interface{}{
+								"quotas": []map[string]interface{}{
+									{
+										"name": "consumer-request-limit",
+										"limits": []map[string]interface{}{
+											{
+												"limit":    requestLimit.Count,
+												"duration": duration,
+											},
+										},
+										"keyExtraction": []map[string]interface{}{
+											{"type": "routename"},
+											{"type": "metadata", "key": "x-wso2-application-id"},
+										},
+									},
+								},
+							},
+						})
+					}
+					if r.Limit.Cost != nil && r.Limit.Cost.Enabled {
+						costLimit := r.Limit.Cost
+						duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit)
+						if err != nil {
+							return "", fmt.Errorf("invalid consumer cost reset window for resource %s: %w", r.Resource, err)
+						}
+						addOrAppendPolicyPath(&policies, llmCostBasedRateLimitPolicyName, "", api.LLMPolicyPath{
+							Path:    r.Resource,
+							Methods: []api.LLMPolicyPathMethods{"*"},
+							Params: map[string]interface{}{
+								"budgetLimits": []map[string]interface{}{
+									{"amount": costLimit.Amount, "duration": duration},
+								},
+								"consumerBased": true,
+							},
+						})
+						if !hasPolicy(policies, llmCostPolicyName) {
+							policies = append(policies, api.LLMPolicy{
+								Name:    llmCostPolicyName,
+								Version: "",
+								Paths: []api.LLMPolicyPath{
+									{
+										Path:    "/*",
+										Methods: []api.LLMPolicyPathMethods{"*"},
+										Params:  map[string]interface{}{},
+									},
+								},
+							})
+						}
+					}
+				}
 			}
 		}
 	}
@@ -897,9 +1164,17 @@ func normalizePolicyVersionToMajor(version string) string {
 }
 
 func addOrAppendPolicyPath(policies *[]api.LLMPolicy, name, version string, path api.LLMPolicyPath) {
+	newConsumerBased, _ := path.Params["consumerBased"].(bool)
+
 	for i := range *policies {
 		if (*policies)[i].Name == name && (*policies)[i].Version == version {
-			// TODO: Temporary
+			// Only merge entries that share the same scope (backend vs consumer)
+			if len((*policies)[i].Paths) > 0 {
+				existingConsumerBased, _ := (*policies)[i].Paths[0].Params["consumerBased"].(bool)
+				if existingConsumerBased != newConsumerBased {
+					continue // different scope — skip, look for another entry
+				}
+			}
 			for _, existingPath := range (*policies)[i].Paths {
 				if existingPath.Path == path.Path {
 					// Keep first occurrence and avoid duplicates.
@@ -918,6 +1193,15 @@ func addOrAppendPolicyPath(policies *[]api.LLMPolicy, name, version string, path
 	})
 }
 
+func hasPolicy(policies []api.LLMPolicy, name string) bool {
+	for _, p := range policies {
+		if p.Name == name {
+			return true
+		}
+	}
+	return false
+}
+
 func isBoolTrue(v *bool) bool {
 	return v != nil && *v
 }
diff --git a/platform-api/src/internal/service/llm_deployment_test.go b/platform-api/src/internal/service/llm_deployment_test.go
index da07a0ec6..137ded851 100644
--- a/platform-api/src/internal/service/llm_deployment_test.go
+++ b/platform-api/src/internal/service/llm_deployment_test.go
@@ -1,6 +1,7 @@
 package service
 
 import (
+	"strings"
 	"testing"
 
 	"platform-api/src/internal/model"
@@ -17,3 +18,467 @@ func TestMapModelAuthToAPI_NormalizesApiKeyType(t *testing.T) {
 		t.Fatalf("expected auth type to be api-key, got %q", *out.Type)
 	}
 }
+
+func float32Ptr(f float32) *float32 { return &f }
+
+// providerWithConsumerLimits builds a minimal LLMProvider model with the given
+// consumer-level rate limiting config and no backend (provider-level) limits.
+func providerWithConsumerLimits(rl *model.LLMRateLimitingConfig) *model.LLMProvider {
+	return &model.LLMProvider{
+		ID:      "test-provider",
+		Name:    "Test Provider",
+		Version: "v1.0",
+		Configuration: model.LLMProviderConfig{
+			Context: strPtr("/test"),
+			Upstream: &model.UpstreamConfig{
+				Main: &model.UpstreamEndpoint{
+					URL: "https://api.anthropic.com",
+					Auth: &model.UpstreamAuth{
+						Type:   "api-key",
+						Header: "x-api-key",
+						Value:  "test-key",
+					},
+				},
+			},
+			AccessControl: &model.LLMAccessControl{Mode: "allow_all"},
+			RateLimiting:  rl,
+		},
+	}
+}
+
+// TestGenerateYAML_ConsumerRequestLimit verifies that a consumer-only request limit
+// generates a single advanced-ratelimit policy where the key extraction includes
+// x-wso2-application-id (making it consumer-scoped). Unlike token/cost limits,
+// the request limit does NOT use a consumerBased flag — it uses the application ID
+// directly in the key extraction.
+func TestGenerateYAML_ConsumerRequestLimit(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Request: &model.RequestRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+			},
+		},
+	}
+
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if !strings.Contains(yaml, "advanced-ratelimit") {
+		t.Error("expected advanced-ratelimit policy in generated YAML")
+	}
+	// Consumer-scoped: key extraction must include x-wso2-application-id
+	if !strings.Contains(yaml, "x-wso2-application-id") {
+		t.Error("expected x-wso2-application-id in key extraction for consumer request limit")
+	}
+	// Should NOT have a backend (non-consumer) advanced-ratelimit entry
+	if strings.Count(yaml, "advanced-ratelimit") > 1 {
+		t.Error("expected only one advanced-ratelimit policy (consumer), got more than one")
+	}
+
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_ConsumerTokenLimit verifies that a consumer token limit
+// generates a token-based-ratelimit policy with consumerBased: true.
+func TestGenerateYAML_ConsumerTokenLimit(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Token: &model.TokenRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+			},
+		},
+	}
+
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if !strings.Contains(yaml, "consumerBased: true") {
+		t.Error("expected consumerBased: true in generated YAML")
+	}
+	if !strings.Contains(yaml, "token-based-ratelimit") {
+		t.Error("expected token-based-ratelimit policy in generated YAML")
+	}
+
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_ConsumerCostLimit verifies that a consumer cost limit
+// generates a llm-cost-based-ratelimit policy with consumerBased: true.
+func TestGenerateYAML_ConsumerCostLimit(t *testing.T) {
+	rl := &model.LLMRateLimitingConfig{
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Cost: &model.CostRateLimit{
+					Enabled: true,
+					Amount:  0.1,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+			},
+		},
+	}
+
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if !strings.Contains(yaml, "consumerBased: true") {
+		t.Error("expected consumerBased: true in generated YAML")
+	}
+	if !strings.Contains(yaml, "llm-cost-based-ratelimit") {
+		t.Error("expected llm-cost-based-ratelimit policy in generated YAML")
+	}
+
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_BothBackendAndConsumerLimits verifies that when both a backend
+// and a consumer limit are configured, two separate policies are generated — one
+// without consumerBased and one with consumerBased: true.
+func TestGenerateYAML_BothBackendAndConsumerLimits(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Token: &model.TokenRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 1, Unit: "hour"},
+				},
+			},
+		},
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Token: &model.TokenRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 1, Unit: "hour"},
+				},
+			},
+		},
+	}
+
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Should have two token-based-ratelimit policies
+	if strings.Count(yaml, "token-based-ratelimit") < 2 {
+		t.Errorf("expected two token-based-ratelimit entries, got:\n%s", yaml)
+	}
+	// One must be consumer-based
+	if !strings.Contains(yaml, "consumerBased: true") {
+		t.Error("expected consumerBased: true in generated YAML")
+	}
+
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// ---------------------------------------------------------------------------
+// Regression: backend-only limits (no consumer)
+// ---------------------------------------------------------------------------
+
+// TestGenerateYAML_BackendOnlyTokenLimit verifies that a backend-only token limit
+// generates a token-based-ratelimit policy without consumerBased.
+func TestGenerateYAML_BackendOnlyTokenLimit(t *testing.T) {
+	count := 500
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Token: &model.TokenRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(yaml, "token-based-ratelimit") {
+		t.Error("expected token-based-ratelimit in generated YAML")
+	}
+	if strings.Contains(yaml, "consumerBased") {
+		t.Error("expected no consumerBased for backend-only limit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_BackendOnlyRequestLimit verifies that a backend-only request limit
+// generates an advanced-ratelimit policy with quota name "request-limit" (not "consumer-request-limit")
+// and without x-wso2-application-id in the key extraction.
+func TestGenerateYAML_BackendOnlyRequestLimit(t *testing.T) {
+	count := 500
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(yaml, "advanced-ratelimit") {
+		t.Error("expected advanced-ratelimit in generated YAML")
+	}
+	if strings.Contains(yaml, "x-wso2-application-id") {
+		t.Error("expected no x-wso2-application-id for backend-only request limit")
+	}
+	if !strings.Contains(yaml, "request-limit") {
+		t.Error("expected quota name 'request-limit' in generated YAML")
+	}
+	if strings.Contains(yaml, "consumer-request-limit") {
+		t.Error("expected no 'consumer-request-limit' for backend-only request limit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_BackendOnlyCostLimit verifies that a backend-only cost limit
+// generates an llm-cost-based-ratelimit policy without consumerBased, plus one llm-cost policy.
+func TestGenerateYAML_BackendOnlyCostLimit(t *testing.T) {
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Cost: &model.CostRateLimit{Enabled: true, Amount: 1.0, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(yaml, "llm-cost-based-ratelimit") {
+		t.Error("expected llm-cost-based-ratelimit in generated YAML")
+	}
+	if strings.Contains(yaml, "consumerBased") {
+		t.Error("expected no consumerBased for backend-only cost limit")
+	}
+	if strings.Count(yaml, "llm-cost") < 2 {
+		t.Error("expected llm-cost policy alongside llm-cost-based-ratelimit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+func TestGenerateYAML_BackendResourceWiseDefaultCostLimit(t *testing.T) {
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			ResourceWise: &model.ResourceWiseRateLimitingConfig{
+				Default: model.RateLimitingLimitConfig{
+					Cost: &model.CostRateLimit{Enabled: true, Amount: 0.10, Reset: model.RateLimitResetWindow{Duration: 24, Unit: "hour"}},
+				},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(yaml, "llm-cost-based-ratelimit") {
+		t.Error("expected llm-cost-based-ratelimit in generated YAML")
+	}
+	if !strings.Contains(yaml, "budgetLimits") {
+		t.Error("expected budgetLimits in generated YAML")
+	}
+	if strings.Contains(yaml, "consumerBased") {
+		t.Error("expected no consumerBased for backend-only cost limit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+func TestGenerateYAML_BackendPerResourceCostLimit(t *testing.T) {
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			ResourceWise: &model.ResourceWiseRateLimitingConfig{
+				Default: model.RateLimitingLimitConfig{},
+				Resources: []model.RateLimitingResourceLimit{
+					{
+						Resource: "/v1/messages",
+						Limit: model.RateLimitingLimitConfig{
+							Cost: &model.CostRateLimit{Enabled: true, Amount: 0.02, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+						},
+					},
+				},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !strings.Contains(yaml, "llm-cost-based-ratelimit") {
+		t.Error("expected llm-cost-based-ratelimit in generated YAML")
+	}
+	if !strings.Contains(yaml, "budgetLimits") {
+		t.Error("expected budgetLimits in generated YAML")
+	}
+	if !strings.Contains(yaml, "/v1/messages") {
+		t.Error("expected resource path /v1/messages in generated YAML")
+	}
+	if strings.Contains(yaml, "consumerBased") {
+		t.Error("expected no consumerBased for backend-only cost limit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// ---------------------------------------------------------------------------
+// Backend + consumer for individual limit types
+// ---------------------------------------------------------------------------
+
+// TestGenerateYAML_BothBackendAndConsumerRequestLimits verifies that backend and consumer
+// request limits produce two advanced-ratelimit policies with distinct quota names:
+// "request-limit" (backend, no app-id key) and "consumer-request-limit" (consumer, with app-id key).
+func TestGenerateYAML_BothBackendAndConsumerRequestLimits(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.Count(yaml, "advanced-ratelimit") < 2 {
+		t.Error("expected two advanced-ratelimit policies (one backend, one consumer)")
+	}
+	if !strings.Contains(yaml, "consumer-request-limit") {
+		t.Error("expected 'consumer-request-limit' quota name for consumer policy")
+	}
+	if !strings.Contains(yaml, "x-wso2-application-id") {
+		t.Error("expected x-wso2-application-id in consumer policy key extraction")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_BothBackendAndConsumerCostLimits verifies that backend and consumer
+// cost limits produce two llm-cost-based-ratelimit policies (one with consumerBased: true)
+// and exactly one llm-cost policy (not duplicated).
+func TestGenerateYAML_BothBackendAndConsumerCostLimits(t *testing.T) {
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Cost: &model.CostRateLimit{Enabled: true, Amount: 1.0, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Cost: &model.CostRateLimit{Enabled: true, Amount: 0.1, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.Count(yaml, "llm-cost-based-ratelimit") < 2 {
+		t.Error("expected two llm-cost-based-ratelimit policies (backend + consumer)")
+	}
+	if !strings.Contains(yaml, "consumerBased: true") {
+		t.Error("expected consumerBased: true on consumer cost policy")
+	}
+	// llm-cost must appear exactly once — hasPolicy check prevents duplication
+	if strings.Count(yaml, "name: llm-cost\n") != 1 {
+		t.Errorf("expected exactly one llm-cost policy, got:\n%s", yaml)
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// ---------------------------------------------------------------------------
+// Edge cases
+// ---------------------------------------------------------------------------
+
+// TestGenerateYAML_DisabledLimitIsSkipped verifies that a limit with Enabled: false
+// produces no rate limiting policies.
+func TestGenerateYAML_DisabledLimitIsSkipped(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Token: &model.TokenRateLimit{Enabled: false, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+				Cost:  &model.CostRateLimit{Enabled: false, Amount: 0.5, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}},
+			},
+		},
+	}
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.Contains(yaml, "token-based-ratelimit") {
+		t.Error("expected no token-based-ratelimit for disabled token limit")
+	}
+	if strings.Contains(yaml, "llm-cost-based-ratelimit") {
+		t.Error("expected no llm-cost-based-ratelimit for disabled cost limit")
+	}
+	t.Logf("Generated YAML:\n%s", yaml)
+}
+
+// TestGenerateYAML_AllThreeConsumerLimits verifies the full UI scenario from the
+// screenshot: consumer request + token + cost all enabled, no backend limits.
+func TestGenerateYAML_AllThreeConsumerLimits(t *testing.T) {
+	count := 100
+	rl := &model.LLMRateLimitingConfig{
+		ProviderLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{},
+		},
+		ConsumerLevel: &model.RateLimitingScopeConfig{
+			Global: &model.RateLimitingLimitConfig{
+				Request: &model.RequestRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+				Token: &model.TokenRateLimit{
+					Enabled: true,
+					Count:   count,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+				Cost: &model.CostRateLimit{
+					Enabled: true,
+					Amount:  0.1,
+					Reset:   model.RateLimitResetWindow{Duration: 2, Unit: "hour"},
+				},
+			},
+		},
+	}
+
+	yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	checks := []string{
+		"advanced-ratelimit",
+		"token-based-ratelimit",
+		"llm-cost-based-ratelimit",
+		"consumerBased: true",
+	}
+	for _, want := range checks {
+		if !strings.Contains(yaml, want) {
+			t.Errorf("expected %q in generated YAML", want)
+		}
+	}
+
+	t.Logf("Generated YAML:\n%s", yaml)
+}