diff --git a/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml b/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml index cfb10d737..5c4d19190 100644 --- a/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml +++ b/gateway/gateway-controller/default-policies/advanced-ratelimit.yaml @@ -1,5 +1,5 @@ name: advanced-ratelimit -version: v1.0.2 +version: v1.0.3 description: | Applies advanced rate limits using fixed-window (default) or GCRA algorithms with multi-quota controls, configurable key and cost extraction, and memory or Redis storage. diff --git a/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml b/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml index a8782a0a0..cf717d191 100644 --- a/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml +++ b/gateway/gateway-controller/default-policies/llm-cost-based-ratelimit.yaml @@ -1,5 +1,5 @@ name: llm-cost-based-ratelimit -version: v1.0.2 +version: v1.0.3 description: | A specialized rate limiting policy for LLMs that limits usage based on monetary budgets. The policy reads costs from SharedContext.Metadata under "x-llm-cost" (set by the llm-cost @@ -42,6 +42,16 @@ parameters: Examples: "1h" (1 hour), "24h" (1 day), "168h" (1 week), "720h" (30 days) pattern: "^(([0-9]+(\\.[0-9]*)?|\\.[0-9]+)(ns|us|µs|ms|s|m|h))+$" + consumerBased: + type: boolean + x-wso2-policy-advanced-param: false + description: | + When true, rate limits are applied per consumer (GenAI application) identified + by the x-wso2-application-id metadata key set by the api-key-auth policy. + Each application gets its own independent cost counter. + When false (default), a single shared limit applies across all consumers. + default: false + systemParameters: type: object additionalProperties: false diff --git a/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml b/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml index 644a3cb49..54c32a8fd 100644 --- a/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml +++ b/gateway/gateway-controller/default-policies/token-based-ratelimit.yaml @@ -1,5 +1,5 @@ name: token-based-ratelimit -version: v1.0.2 +version: v1.0.3 description: | Enforces token-based rate limits for LLM traffic by resolving token extraction paths from provider templates and delegating enforcement to the @@ -75,6 +75,15 @@ parameters: description: Specifies the duration window for the limit as a Go duration string, for example "1s", "1m", "1h", or "24h". pattern: "^[-+]?(([0-9]+(\\.[0-9]*)?|\\.[0-9]+)(ns|us|µs|ms|s|m|h))+$" + consumerBased: + type: boolean + x-wso2-policy-advanced-param: false + description: | + When true, rate limits are applied per consumer (GenAI application) identified + by the x-wso2-application-id metadata key set by the api-key-auth policy. + Each application gets its own independent token counter. + When false (default), a single shared limit applies across all consumers. + default: false oneOf: - required: ["promptTokenLimits"] not: diff --git a/gateway/it/features/consumer-cost-based-ratelimit.feature b/gateway/it/features/consumer-cost-based-ratelimit.feature new file mode 100644 index 000000000..ebb745524 --- /dev/null +++ b/gateway/it/features/consumer-cost-based-ratelimit.feature @@ -0,0 +1,490 @@ +# -------------------------------------------------------------------- +# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com). +# +# WSO2 LLC. licenses this file to you under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# -------------------------------------------------------------------- + +@consumer-cost-based-ratelimit +Feature: Consumer Cost-Based Rate Limiting + As an API developer + I want cost limits to be enforced independently per GenAI application + So that one application exhausting its budget does not block other applications + + Background: + Given the gateway services are running + And I authenticate using basic auth as "admin" + + Scenario: Each consumer gets an independent cost budget + # mock-openai returns gpt-4.1-2025-04-14: 19 prompt × $2/1M + 10 completion × $8/1M = $0.0001180000 + # Budget per consumer: $0.000236 = exactly 2 requests worth + # App A sends 2 requests (budget exhausted) and is blocked on the 3rd. + # App B is unaffected — its budget counter is still at $0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ccbrl-template + spec: + displayName: CCBRL Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ccbrl-provider + spec: + displayName: CCBRL Provider + version: v1.0 + context: /ccbrl + template: ccbrl-template + upstream: + url: http://mock-openapi:4010 + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 0.000236 + duration: "1h" + consumerBased: true + - name: llm-cost + version: v1 + paths: + - path: /* + methods: ['*'] + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A + When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body: + """ + { + "name": "ccbrl-app-a", + "apiKey": "ccbrl-app-a-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-provider/api-keys" with body: + """ + { + "name": "ccbrl-app-b", + "apiKey": "ccbrl-app-b-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: request 1 — allowed, budget drops to $0.000118 + When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App A: request 2 — allowed, budget reaches exactly $0 + When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App A: request 3 — blocked, budget exhausted + When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-a-key-000000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # App B: request 1 — should succeed, App B has its own independent cost counter + When I send a POST request to "http://localhost:8080/ccbrl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-app-b-key-000000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ccbrl-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ccbrl-template" + Then the response status code should be 200 + + Scenario: Backend cost limit blocks all consumers when shared budget is exhausted + # Backend limit: $0.000236/hour shared across all apps (exactly 2 requests worth). + # Consumer limit: $0.000236/hour per app independently. + # App A sends 2 requests — exhausts the shared backend budget. + # App B's next request is blocked by the backend limit even though + # App B's own consumer budget is still at $0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ccbrl-both-template + spec: + displayName: CCBRL Both Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ccbrl-both-provider + spec: + displayName: CCBRL Both Provider + version: v1.0 + context: /ccbrl-both + template: ccbrl-both-template + upstream: + url: http://mock-openapi:4010 + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 0.000236 + duration: "1h" + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 0.000236 + duration: "1h" + consumerBased: true + - name: llm-cost + version: v1 + paths: + - path: /* + methods: ['*'] + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A + When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body: + """ + { + "name": "ccbrl-both-app-a", + "apiKey": "ccbrl-both-app-a-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-both-provider/api-keys" with body: + """ + { + "name": "ccbrl-both-app-b", + "apiKey": "ccbrl-both-app-b-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: request 1 — allowed, shared backend budget drops to $0.000118 + When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App A: request 2 — allowed, shared backend budget reaches exactly $0 + When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-a-key-00000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App B: blocked by the shared backend budget even though its own consumer budget is at $0 + When I send a POST request to "http://localhost:8080/ccbrl-both/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-both-app-b-key-00000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ccbrl-both-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ccbrl-both-template" + Then the response status code should be 200 + + Scenario: Requests without an app ID share a single "default" cost budget + # When no api-key-auth is in the chain, x-wso2-application-id is never written to + # metadata. The fallback key "default" is used so all unauthenticated requests count + # against the same "default" cost bucket (not the backend "routename" bucket). + # Budget: $0.000236/hour (2 requests worth at gpt-4.1-2025-04-14 pricing). + # After 2 requests the "default" budget is exhausted and further requests are blocked. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ccbrl-fallback-template + spec: + displayName: CCBRL Fallback Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ccbrl-fallback-provider + spec: + displayName: CCBRL Fallback Provider + version: v1.0 + context: /ccbrl-fallback + template: ccbrl-fallback-template + upstream: + url: http://mock-openapi:4010 + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 0.000236 + duration: "1h" + consumerBased: true + - name: llm-cost + version: v1 + paths: + - path: /* + methods: ['*'] + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + Given I set header "Content-Type" to "application/json" + + # Request 1 — no app ID, key = "ccbrl-fallback:default" — allowed, budget drops to $0.000118 + When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 2 — no app ID, same "default" budget — allowed, budget reaches exactly $0 + When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 3 — "default" budget exhausted — blocked + When I send a POST request to "http://localhost:8080/ccbrl-fallback/openai/v1/chat/completions" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ccbrl-fallback-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ccbrl-fallback-template" + Then the response status code should be 200 + + Scenario: Consumer counter is not double-deducted when both backend and consumer limits are active + # This test guards against the llm_cost_delegate metadata key collision. + # + # Without the fix: both backend and consumer LLMCostRateLimitPolicy instances write + # their delegate reference to the same metadata key ("llm_cost_delegate"). The consumer + # overwrites the backend's entry. In the response phase (reverse order), the backend + # instance reads back the consumer's delegate and calls it — so the consumer's + # OnResponseBody runs twice. The consumer counter is drained twice as fast. + # + # With the fix: backend uses "llm_cost_delegate", consumer uses + # "llm_cost_delegate_consumer". Each instance reads back only its own delegate. + # + # Setup: + # Backend limit: $1/hour (very high — never exhausted in this test) + # Consumer limit: $0.000236/hour = exactly 2 requests at gpt-4.1-2025-04-14 pricing + # + # Expected (with fix): + # request 1 → 200 (consumer deducted once: $0.000236 - $0.000118 = $0.000118 remaining) + # request 2 → 200 (consumer deducted once: $0.000118 - $0.000118 = $0 remaining) + # request 3 → 429 (consumer exhausted) + # + # Without fix: + # request 1 → 200 (consumer deducted twice: $0.000236 - 2×$0.000118 = $0 remaining) + # request 2 → 429 ← test fails here + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ccbrl-nodbl-template + spec: + displayName: CCBRL No-Double Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ccbrl-nodbl-provider + spec: + displayName: CCBRL No-Double Provider + version: v1.0 + context: /ccbrl-nodbl + template: ccbrl-nodbl-template + upstream: + url: http://mock-openapi:4010 + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 1.0 + duration: "1h" + - name: llm-cost-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + budgetLimits: + - amount: 0.000236 + duration: "1h" + consumerBased: true + - name: llm-cost + version: v1 + paths: + - path: /* + methods: ['*'] + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + When I send a POST request to the "gateway-controller" service at "/llm-providers/ccbrl-nodbl-provider/api-keys" with body: + """ + { + "name": "ccbrl-nodbl-app-a", + "apiKey": "ccbrl-nodbl-app-a-key-0000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # Request 1 — allowed; consumer budget: $0.000236 - $0.000118 = $0.000118 remaining + When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 2 — allowed; consumer budget: $0.000118 - $0.000118 = $0 remaining + # Without the fix this would be 429 because the consumer counter was double-deducted on request 1 + When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 3 — blocked; consumer budget exhausted + When I send a POST request to "http://localhost:8080/ccbrl-nodbl/openai/v1/chat/completions" with header "x-api-key" value "ccbrl-nodbl-app-a-key-0000000000000000000000" with body: + """ json + {"model": "gpt-4.1-2025-04-14", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ccbrl-nodbl-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ccbrl-nodbl-template" + Then the response status code should be 200 diff --git a/gateway/it/features/consumer-request-based-ratelimit.feature b/gateway/it/features/consumer-request-based-ratelimit.feature new file mode 100644 index 000000000..921d7a642 --- /dev/null +++ b/gateway/it/features/consumer-request-based-ratelimit.feature @@ -0,0 +1,365 @@ +# -------------------------------------------------------------------- +# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com). +# +# WSO2 LLC. licenses this file to you under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# -------------------------------------------------------------------- + +@consumer-request-based-ratelimit +Feature: Consumer Request-Based Rate Limiting + As an API developer + I want request count limits to be enforced independently per GenAI application + So that one application exhausting its request quota does not block other applications + + Background: + Given the gateway services are running + And I authenticate using basic auth as "admin" + + Scenario: Each consumer gets an independent request counter + # Each app gets 2 requests/hour independently. + # App A sends 2 requests (limit reached) and gets blocked on the 3rd. + # App B is unaffected — its counter is still at 0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: crbrl-template + spec: + displayName: CRBRL Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: crbrl-provider + spec: + displayName: CRBRL Provider + version: v1.0 + context: /crbrl + template: crbrl-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: advanced-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + quotas: + - name: consumer-request-limit + limits: + - limit: 2 + duration: "1h" + keyExtraction: + - type: routename + - type: metadata + key: x-wso2-application-id + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A + When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body: + """ + { + "name": "crbrl-app-a", + "apiKey": "crbrl-app-a-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-provider/api-keys" with body: + """ + { + "name": "crbrl-app-b", + "apiKey": "crbrl-app-b-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: request 1 — allowed (counter: 1/2) + When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App A: request 2 — allowed (counter: 2/2, limit reached) + When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App A: request 3 — blocked, request quota exhausted + When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-a-key-000000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # App B: request 1 — should succeed, App B has its own independent counter + When I send a POST request to "http://localhost:8080/crbrl/chat/completions" with header "x-api-key" value "crbrl-app-b-key-000000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "crbrl-provider" + Then the response status code should be 200 + When I delete the LLM provider template "crbrl-template" + Then the response status code should be 200 + + Scenario: Backend request limit blocks all consumers when shared quota is exhausted + # Backend limit: 3 requests/hour shared across all apps. + # Consumer limit: 3 requests/hour per app independently. + # App A sends 3 requests — exhausts the shared backend counter. + # App B's next request is blocked by the backend limit even though + # App B's own consumer counter is still at 0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: crbrl-both-template + spec: + displayName: CRBRL Both Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: crbrl-both-provider + spec: + displayName: CRBRL Both Provider + version: v1.0 + context: /crbrl-both + template: crbrl-both-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: advanced-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + quotas: + - name: backend-request-limit + limits: + - limit: 3 + duration: "1h" + keyExtraction: + - type: routename + - name: advanced-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + quotas: + - name: consumer-request-limit + limits: + - limit: 3 + duration: "1h" + keyExtraction: + - type: routename + - type: metadata + key: x-wso2-application-id + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A + When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body: + """ + { + "name": "crbrl-both-app-a", + "apiKey": "crbrl-both-app-a-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/crbrl-both-provider/api-keys" with body: + """ + { + "name": "crbrl-both-app-b", + "apiKey": "crbrl-both-app-b-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: requests 1-3 — exhausts the shared backend counter (3/3) + When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-a-key-00000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # App B: blocked by the shared backend counter even though its own consumer counter is at 0 + When I send a POST request to "http://localhost:8080/crbrl-both/chat/completions" with header "x-api-key" value "crbrl-both-app-b-key-00000000000000000000000" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "crbrl-both-provider" + Then the response status code should be 200 + When I delete the LLM provider template "crbrl-both-template" + Then the response status code should be 200 + + Scenario: Requests without an app ID share a single "default" counter + # When no api-key-auth is in the chain, x-wso2-application-id is never written to + # metadata. The fallback key "default" is used instead of a "_missing_metadata_*_" + # placeholder, so all unauthenticated requests count against the same "default" bucket. + # Limit: 2 requests/hour. After 2 requests the "default" counter is exhausted and + # all further requests (still with no app ID) are blocked. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: crbrl-fallback-template + spec: + displayName: CRBRL Fallback Template + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: crbrl-fallback-provider + spec: + displayName: CRBRL Fallback Provider + version: v1.0 + context: /crbrl-fallback + template: crbrl-fallback-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: advanced-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + quotas: + - name: consumer-request-limit + limits: + - limit: 2 + duration: "1h" + keyExtraction: + - type: routename + - type: metadata + key: x-wso2-application-id + fallback: default + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + Given I set header "Content-Type" to "application/json" + + # Request 1 — no app ID in metadata, key = "crbrl-fallback:default" — allowed (1/2) + When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 2 — no app ID in metadata, same "default" counter — allowed (2/2) + When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 200 + + # Request 3 — "default" counter exhausted — blocked + When I send a POST request to "http://localhost:8080/crbrl-fallback/chat/completions" with body: + """ + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]} + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "crbrl-fallback-provider" + Then the response status code should be 200 + When I delete the LLM provider template "crbrl-fallback-template" + Then the response status code should be 200 diff --git a/gateway/it/features/consumer-token-based-ratelimit.feature b/gateway/it/features/consumer-token-based-ratelimit.feature new file mode 100644 index 000000000..c7e08a464 --- /dev/null +++ b/gateway/it/features/consumer-token-based-ratelimit.feature @@ -0,0 +1,409 @@ +# -------------------------------------------------------------------- +# Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com). +# +# WSO2 LLC. licenses this file to you under the Apache License, +# Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# -------------------------------------------------------------------- + +@consumer-token-based-ratelimit +Feature: Consumer Token-Based Rate Limiting + As an API developer + I want token limits to be enforced independently per GenAI application + So that one application exhausting its budget does not block other applications + + Background: + Given the gateway services are running + And I authenticate using basic auth as "admin" + + Scenario: Each consumer gets an independent token counter + # Each app gets 20 total tokens/hour independently. + # mock-openai returns usage.total_tokens = 10 per request. + # App A uses 2 requests (20 tokens) and gets blocked on the 3rd. + # App B is unaffected — its counter is still at 0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ctbrl-template + spec: + displayName: CTBRL Template + totalTokens: + location: payload + identifier: $.json.usage.total_tokens + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ctbrl-provider + spec: + displayName: CTBRL Provider + version: v1.0 + context: /ctbrl + template: ctbrl-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: token-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + totalTokenLimits: + - count: 20 + duration: "1h" + consumerBased: true + algorithm: fixed-window + backend: memory + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A (pre-set known value, min 36 chars) + When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body: + """ + { + "name": "ctbrl-app-a", + "apiKey": "ctbrl-app-a-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-provider/api-keys" with body: + """ + { + "name": "ctbrl-app-b", + "apiKey": "ctbrl-app-b-key-000000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: request 1 — consumes 10 tokens (counter: 10/20) + When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # App A: request 2 — consumes 10 more tokens (counter: 20/20, limit reached) + When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # App A: request 3 — blocked, token budget exhausted + When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-a-key-000000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 429 + + # App B: request 1 — should succeed, App B has its own independent counter + When I send a POST request to "http://localhost:8080/ctbrl/chat/completions" with header "x-api-key" value "ctbrl-app-b-key-000000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ctbrl-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ctbrl-template" + Then the response status code should be 200 + + Scenario: Backend limit blocks all consumers when shared budget is exhausted + # Backend limit: 30 total tokens/hour shared across all apps. + # Consumer limit: 30 total tokens/hour per app independently. + # Each request uses 10 tokens (via echo backend). + # App A sends 3 requests — exhausts the backend shared counter (30/30). + # App B's next request is blocked by the backend limit even though + # App B's own consumer counter is only at 0. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ctbrl-both-template + spec: + displayName: CTBRL Both Template + totalTokens: + location: payload + identifier: $.json.usage.total_tokens + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ctbrl-both-provider + spec: + displayName: CTBRL Both Provider + version: v1.0 + context: /ctbrl-both + template: ctbrl-both-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: api-key-auth + version: v1 + paths: + - path: /* + methods: ['*'] + params: + key: x-api-key + in: header + - name: token-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + totalTokenLimits: + - count: 30 + duration: "1h" + algorithm: fixed-window + backend: memory + - name: token-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + totalTokenLimits: + - count: 30 + duration: "1h" + consumerBased: true + algorithm: fixed-window + backend: memory + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + # Create API key for App A + When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body: + """ + { + "name": "ctbrl-both-app-a", + "apiKey": "ctbrl-both-app-a-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + + # Create API key for App B + When I send a POST request to the "gateway-controller" service at "/llm-providers/ctbrl-both-provider/api-keys" with body: + """ + { + "name": "ctbrl-both-app-b", + "apiKey": "ctbrl-both-app-b-key-00000000000000000000000" + } + """ + Then the response status code should be 201 + And I wait for 2 seconds + + Given I set header "Content-Type" to "application/json" + + # App A: requests 1-3 — exhausts the shared backend counter (30 tokens) + When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-a-key-00000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # App B: blocked by the shared backend counter even though its own consumer counter is at 0 + When I send a POST request to "http://localhost:8080/ctbrl-both/chat/completions" with header "x-api-key" value "ctbrl-both-app-b-key-00000000000000000000000" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ctbrl-both-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ctbrl-both-template" + Then the response status code should be 200 + + Scenario: Requests without an app ID share a single "default" token counter + # When no api-key-auth is in the chain, x-wso2-application-id is never written to + # metadata. The fallback key "default" is used so all unauthenticated requests count + # against the same "default" token bucket (not against the backend "routename" bucket). + # Limit: 20 total tokens/hour. Each request consumes 10 tokens via echo backend. + # After 2 requests (20 tokens) the "default" counter is exhausted and further + # requests are blocked. + When I create this LLM provider template: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProviderTemplate + metadata: + name: ctbrl-fallback-template + spec: + displayName: CTBRL Fallback Template + totalTokens: + location: payload + identifier: $.json.usage.total_tokens + """ + Then the response status code should be 201 + + When I create this LLM provider: + """ + apiVersion: gateway.api-platform.wso2.com/v1alpha1 + kind: LlmProvider + metadata: + name: ctbrl-fallback-provider + spec: + displayName: CTBRL Fallback Provider + version: v1.0 + context: /ctbrl-fallback + template: ctbrl-fallback-template + upstream: + url: http://echo-backend-multi-arch:8080/anything + auth: + type: api-key + header: Authorization + value: test-key + accessControl: + mode: allow_all + policies: + - name: token-based-ratelimit + version: v1 + paths: + - path: /* + methods: ['*'] + params: + totalTokenLimits: + - count: 20 + duration: "1h" + consumerBased: true + algorithm: fixed-window + backend: memory + """ + Then the response status code should be 201 + And I wait for policy snapshot sync + + Given I set header "Content-Type" to "application/json" + + # Request 1 — no app ID, key = "ctbrl-fallback:default" — allowed, 10 tokens consumed (10/20) + When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # Request 2 — no app ID, same "default" counter — allowed (20/20, limit reached) + When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 200 + + # Request 3 — "default" token counter exhausted — blocked + When I send a POST request to "http://localhost:8080/ctbrl-fallback/chat/completions" with body: + """ + { + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hello"}], + "usage": {"prompt_tokens": 4, "completion_tokens": 6, "total_tokens": 10} + } + """ + Then the response status code should be 429 + + # Cleanup + Given I authenticate using basic auth as "admin" + When I delete the LLM provider "ctbrl-fallback-provider" + Then the response status code should be 200 + When I delete the LLM provider template "ctbrl-fallback-template" + Then the response status code should be 200 diff --git a/gateway/it/suite_test.go b/gateway/it/suite_test.go index aa52a9962..c9d1b5601 100644 --- a/gateway/it/suite_test.go +++ b/gateway/it/suite_test.go @@ -131,6 +131,9 @@ func getFeaturePaths() []string { "features/cel-conditions.feature", "features/analytics-basic.feature", "features/token-based-ratelimit.feature", + "features/consumer-token-based-ratelimit.feature", + "features/consumer-request-based-ratelimit.feature", + "features/consumer-cost-based-ratelimit.feature", "features/sandbox-routing.feature", "features/subscription-validation.feature", "features/subscription-analytics.feature", diff --git a/platform-api/src/internal/service/llm_deployment.go b/platform-api/src/internal/service/llm_deployment.go index 602d1408d..17904577d 100644 --- a/platform-api/src/internal/service/llm_deployment.go +++ b/platform-api/src/internal/service/llm_deployment.go @@ -668,6 +668,41 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand }, }) } + if providerLevel.Global.Cost != nil && providerLevel.Global.Cost.Enabled { + costLimit := providerLevel.Global.Cost + duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid cost reset window: %w", err) + } + policies = append(policies, api.LLMPolicy{ + Name: llmCostBasedRateLimitPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "budgetLimits": []map[string]interface{}{ + {"amount": costLimit.Amount, "duration": duration}, + }, + }, + }, + }, + }) + if !hasPolicy(policies, llmCostPolicyName) { + policies = append(policies, api.LLMPolicy{ + Name: llmCostPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{}, + }, + }, + }) + } + } } else if providerLevel.ResourceWise != nil { // Step 2.2 Handle resource-wise rate limiting defaultLimit := &providerLevel.ResourceWise.Default @@ -731,6 +766,37 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand }, }) } + if defaultLimit.Cost != nil && defaultLimit.Cost.Enabled { + costLimit := defaultLimit.Cost + duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid cost reset window: %w", err) + } + policies = append(policies, api.LLMPolicy{ + Name: llmCostBasedRateLimitPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "budgetLimits": []map[string]interface{}{ + {"amount": costLimit.Amount, "duration": duration}, + }, + }, + }, + }, + }) + if !hasPolicy(policies, llmCostPolicyName) { + policies = append(policies, api.LLMPolicy{ + Name: llmCostPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + {Path: "/*", Methods: []api.LLMPolicyPathMethods{"*"}, Params: map[string]interface{}{}}, + }, + }) + } + } // Step 2.2.2 Resource-wise rate limit for _, r := range providerLevel.ResourceWise.Resources { @@ -779,6 +845,35 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand }, }) } + if r.Limit.Cost != nil && r.Limit.Cost.Enabled { + costLimit := r.Limit.Cost + duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid cost reset window for resource %s: %w", r.Resource, err) + } + addOrAppendPolicyPath(&policies, llmCostBasedRateLimitPolicyName, "", api.LLMPolicyPath{ + Path: r.Resource, + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "budgetLimits": []map[string]interface{}{ + {"amount": costLimit.Amount, "duration": duration}, + }, + }, + }) + if !hasPolicy(policies, llmCostPolicyName) { + policies = append(policies, api.LLMPolicy{ + Name: llmCostPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{}, + }, + }, + }) + } + } } } } @@ -787,11 +882,183 @@ func generateLLMProviderDeploymentYAML(provider *model.LLMProvider, templateHand consumerLevel := rateLimit.ConsumerLevel if consumerLevel != nil { if consumerLevel.Global != nil { - // Handle global rate limiting - // TODO: Convert global rate limit to policy format + if consumerLevel.Global.Token != nil && consumerLevel.Global.Token.Enabled { + tokenLimit := consumerLevel.Global.Token + duration, err := formatRateLimitDuration(tokenLimit.Reset.Duration, tokenLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer token reset window: %w", err) + } + policies = append(policies, api.LLMPolicy{ + Name: tokenBasedRateLimitPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "totalTokenLimits": []map[string]interface{}{ + { + "count": tokenLimit.Count, + "duration": duration, + }, + }, + "consumerBased": true, + }, + }, + }, + }) + } + if consumerLevel.Global.Request != nil && consumerLevel.Global.Request.Enabled { + requestLimit := consumerLevel.Global.Request + duration, err := formatRateLimitDuration(requestLimit.Reset.Duration, requestLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer request reset window: %w", err) + } + policies = append(policies, api.LLMPolicy{ + Name: advancedRateLimitPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "quotas": []map[string]interface{}{ + { + "name": "consumer-request-limit", + "limits": []map[string]interface{}{ + { + "limit": requestLimit.Count, + "duration": duration, + }, + }, + "keyExtraction": []map[string]interface{}{ + {"type": "routename"}, + {"type": "metadata", "key": "x-wso2-application-id"}, + }, + }, + }, + }, + }, + }, + }) + } + if consumerLevel.Global.Cost != nil && consumerLevel.Global.Cost.Enabled { + costLimit := consumerLevel.Global.Cost + duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer cost reset window: %w", err) + } + policies = append(policies, api.LLMPolicy{ + Name: llmCostBasedRateLimitPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "budgetLimits": []map[string]interface{}{ + {"amount": costLimit.Amount, "duration": duration}, + }, + "consumerBased": true, + }, + }, + }, + }) + if !hasPolicy(policies, llmCostPolicyName) { + policies = append(policies, api.LLMPolicy{ + Name: llmCostPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{}, + }, + }, + }) + } + } } else if consumerLevel.ResourceWise != nil { - // Handle resource-wise rate limiting - // TODO: Convert resource-wise rate limit to policy format + for _, r := range consumerLevel.ResourceWise.Resources { + if r.Limit.Token != nil && r.Limit.Token.Enabled { + tokenLimit := r.Limit.Token + duration, err := formatRateLimitDuration(tokenLimit.Reset.Duration, tokenLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer token reset window for resource %s: %w", r.Resource, err) + } + addOrAppendPolicyPath(&policies, tokenBasedRateLimitPolicyName, "", api.LLMPolicyPath{ + Path: r.Resource, + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "totalTokenLimits": []map[string]interface{}{ + { + "count": tokenLimit.Count, + "duration": duration, + }, + }, + "consumerBased": true, + }, + }) + } + if r.Limit.Request != nil && r.Limit.Request.Enabled { + requestLimit := r.Limit.Request + duration, err := formatRateLimitDuration(requestLimit.Reset.Duration, requestLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer request reset window for resource %s: %w", r.Resource, err) + } + addOrAppendPolicyPath(&policies, advancedRateLimitPolicyName, "", api.LLMPolicyPath{ + Path: r.Resource, + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "quotas": []map[string]interface{}{ + { + "name": "consumer-request-limit", + "limits": []map[string]interface{}{ + { + "limit": requestLimit.Count, + "duration": duration, + }, + }, + "keyExtraction": []map[string]interface{}{ + {"type": "routename"}, + {"type": "metadata", "key": "x-wso2-application-id"}, + }, + }, + }, + }, + }) + } + if r.Limit.Cost != nil && r.Limit.Cost.Enabled { + costLimit := r.Limit.Cost + duration, err := formatRateLimitDuration(costLimit.Reset.Duration, costLimit.Reset.Unit) + if err != nil { + return "", fmt.Errorf("invalid consumer cost reset window for resource %s: %w", r.Resource, err) + } + addOrAppendPolicyPath(&policies, llmCostBasedRateLimitPolicyName, "", api.LLMPolicyPath{ + Path: r.Resource, + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{ + "budgetLimits": []map[string]interface{}{ + {"amount": costLimit.Amount, "duration": duration}, + }, + "consumerBased": true, + }, + }) + if !hasPolicy(policies, llmCostPolicyName) { + policies = append(policies, api.LLMPolicy{ + Name: llmCostPolicyName, + Version: "", + Paths: []api.LLMPolicyPath{ + { + Path: "/*", + Methods: []api.LLMPolicyPathMethods{"*"}, + Params: map[string]interface{}{}, + }, + }, + }) + } + } + } } } } @@ -897,9 +1164,17 @@ func normalizePolicyVersionToMajor(version string) string { } func addOrAppendPolicyPath(policies *[]api.LLMPolicy, name, version string, path api.LLMPolicyPath) { + newConsumerBased, _ := path.Params["consumerBased"].(bool) + for i := range *policies { if (*policies)[i].Name == name && (*policies)[i].Version == version { - // TODO: Temporary + // Only merge entries that share the same scope (backend vs consumer) + if len((*policies)[i].Paths) > 0 { + existingConsumerBased, _ := (*policies)[i].Paths[0].Params["consumerBased"].(bool) + if existingConsumerBased != newConsumerBased { + continue // different scope — skip, look for another entry + } + } for _, existingPath := range (*policies)[i].Paths { if existingPath.Path == path.Path { // Keep first occurrence and avoid duplicates. @@ -918,6 +1193,15 @@ func addOrAppendPolicyPath(policies *[]api.LLMPolicy, name, version string, path }) } +func hasPolicy(policies []api.LLMPolicy, name string) bool { + for _, p := range policies { + if p.Name == name { + return true + } + } + return false +} + func isBoolTrue(v *bool) bool { return v != nil && *v } diff --git a/platform-api/src/internal/service/llm_deployment_test.go b/platform-api/src/internal/service/llm_deployment_test.go index da07a0ec6..137ded851 100644 --- a/platform-api/src/internal/service/llm_deployment_test.go +++ b/platform-api/src/internal/service/llm_deployment_test.go @@ -1,6 +1,7 @@ package service import ( + "strings" "testing" "platform-api/src/internal/model" @@ -17,3 +18,467 @@ func TestMapModelAuthToAPI_NormalizesApiKeyType(t *testing.T) { t.Fatalf("expected auth type to be api-key, got %q", *out.Type) } } + +func float32Ptr(f float32) *float32 { return &f } + +// providerWithConsumerLimits builds a minimal LLMProvider model with the given +// consumer-level rate limiting config and no backend (provider-level) limits. +func providerWithConsumerLimits(rl *model.LLMRateLimitingConfig) *model.LLMProvider { + return &model.LLMProvider{ + ID: "test-provider", + Name: "Test Provider", + Version: "v1.0", + Configuration: model.LLMProviderConfig{ + Context: strPtr("/test"), + Upstream: &model.UpstreamConfig{ + Main: &model.UpstreamEndpoint{ + URL: "https://api.anthropic.com", + Auth: &model.UpstreamAuth{ + Type: "api-key", + Header: "x-api-key", + Value: "test-key", + }, + }, + }, + AccessControl: &model.LLMAccessControl{Mode: "allow_all"}, + RateLimiting: rl, + }, + } +} + +// TestGenerateYAML_ConsumerRequestLimit verifies that a consumer-only request limit +// generates a single advanced-ratelimit policy where the key extraction includes +// x-wso2-application-id (making it consumer-scoped). Unlike token/cost limits, +// the request limit does NOT use a consumerBased flag — it uses the application ID +// directly in the key extraction. +func TestGenerateYAML_ConsumerRequestLimit(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Request: &model.RequestRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + }, + }, + } + + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.Contains(yaml, "advanced-ratelimit") { + t.Error("expected advanced-ratelimit policy in generated YAML") + } + // Consumer-scoped: key extraction must include x-wso2-application-id + if !strings.Contains(yaml, "x-wso2-application-id") { + t.Error("expected x-wso2-application-id in key extraction for consumer request limit") + } + // Should NOT have a backend (non-consumer) advanced-ratelimit entry + if strings.Count(yaml, "advanced-ratelimit") > 1 { + t.Error("expected only one advanced-ratelimit policy (consumer), got more than one") + } + + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_ConsumerTokenLimit verifies that a consumer token limit +// generates a token-based-ratelimit policy with consumerBased: true. +func TestGenerateYAML_ConsumerTokenLimit(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Token: &model.TokenRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + }, + }, + } + + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.Contains(yaml, "consumerBased: true") { + t.Error("expected consumerBased: true in generated YAML") + } + if !strings.Contains(yaml, "token-based-ratelimit") { + t.Error("expected token-based-ratelimit policy in generated YAML") + } + + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_ConsumerCostLimit verifies that a consumer cost limit +// generates a llm-cost-based-ratelimit policy with consumerBased: true. +func TestGenerateYAML_ConsumerCostLimit(t *testing.T) { + rl := &model.LLMRateLimitingConfig{ + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{ + Enabled: true, + Amount: 0.1, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + }, + }, + } + + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.Contains(yaml, "consumerBased: true") { + t.Error("expected consumerBased: true in generated YAML") + } + if !strings.Contains(yaml, "llm-cost-based-ratelimit") { + t.Error("expected llm-cost-based-ratelimit policy in generated YAML") + } + + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_BothBackendAndConsumerLimits verifies that when both a backend +// and a consumer limit are configured, two separate policies are generated — one +// without consumerBased and one with consumerBased: true. +func TestGenerateYAML_BothBackendAndConsumerLimits(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Token: &model.TokenRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}, + }, + }, + }, + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Token: &model.TokenRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}, + }, + }, + }, + } + + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Should have two token-based-ratelimit policies + if strings.Count(yaml, "token-based-ratelimit") < 2 { + t.Errorf("expected two token-based-ratelimit entries, got:\n%s", yaml) + } + // One must be consumer-based + if !strings.Contains(yaml, "consumerBased: true") { + t.Error("expected consumerBased: true in generated YAML") + } + + t.Logf("Generated YAML:\n%s", yaml) +} + +// --------------------------------------------------------------------------- +// Regression: backend-only limits (no consumer) +// --------------------------------------------------------------------------- + +// TestGenerateYAML_BackendOnlyTokenLimit verifies that a backend-only token limit +// generates a token-based-ratelimit policy without consumerBased. +func TestGenerateYAML_BackendOnlyTokenLimit(t *testing.T) { + count := 500 + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Token: &model.TokenRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(yaml, "token-based-ratelimit") { + t.Error("expected token-based-ratelimit in generated YAML") + } + if strings.Contains(yaml, "consumerBased") { + t.Error("expected no consumerBased for backend-only limit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_BackendOnlyRequestLimit verifies that a backend-only request limit +// generates an advanced-ratelimit policy with quota name "request-limit" (not "consumer-request-limit") +// and without x-wso2-application-id in the key extraction. +func TestGenerateYAML_BackendOnlyRequestLimit(t *testing.T) { + count := 500 + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(yaml, "advanced-ratelimit") { + t.Error("expected advanced-ratelimit in generated YAML") + } + if strings.Contains(yaml, "x-wso2-application-id") { + t.Error("expected no x-wso2-application-id for backend-only request limit") + } + if !strings.Contains(yaml, "request-limit") { + t.Error("expected quota name 'request-limit' in generated YAML") + } + if strings.Contains(yaml, "consumer-request-limit") { + t.Error("expected no 'consumer-request-limit' for backend-only request limit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_BackendOnlyCostLimit verifies that a backend-only cost limit +// generates an llm-cost-based-ratelimit policy without consumerBased, plus one llm-cost policy. +func TestGenerateYAML_BackendOnlyCostLimit(t *testing.T) { + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{Enabled: true, Amount: 1.0, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(yaml, "llm-cost-based-ratelimit") { + t.Error("expected llm-cost-based-ratelimit in generated YAML") + } + if strings.Contains(yaml, "consumerBased") { + t.Error("expected no consumerBased for backend-only cost limit") + } + if strings.Count(yaml, "llm-cost") < 2 { + t.Error("expected llm-cost policy alongside llm-cost-based-ratelimit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +func TestGenerateYAML_BackendResourceWiseDefaultCostLimit(t *testing.T) { + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + ResourceWise: &model.ResourceWiseRateLimitingConfig{ + Default: model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{Enabled: true, Amount: 0.10, Reset: model.RateLimitResetWindow{Duration: 24, Unit: "hour"}}, + }, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(yaml, "llm-cost-based-ratelimit") { + t.Error("expected llm-cost-based-ratelimit in generated YAML") + } + if !strings.Contains(yaml, "budgetLimits") { + t.Error("expected budgetLimits in generated YAML") + } + if strings.Contains(yaml, "consumerBased") { + t.Error("expected no consumerBased for backend-only cost limit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +func TestGenerateYAML_BackendPerResourceCostLimit(t *testing.T) { + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + ResourceWise: &model.ResourceWiseRateLimitingConfig{ + Default: model.RateLimitingLimitConfig{}, + Resources: []model.RateLimitingResourceLimit{ + { + Resource: "/v1/messages", + Limit: model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{Enabled: true, Amount: 0.02, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + }, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(yaml, "llm-cost-based-ratelimit") { + t.Error("expected llm-cost-based-ratelimit in generated YAML") + } + if !strings.Contains(yaml, "budgetLimits") { + t.Error("expected budgetLimits in generated YAML") + } + if !strings.Contains(yaml, "/v1/messages") { + t.Error("expected resource path /v1/messages in generated YAML") + } + if strings.Contains(yaml, "consumerBased") { + t.Error("expected no consumerBased for backend-only cost limit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// --------------------------------------------------------------------------- +// Backend + consumer for individual limit types +// --------------------------------------------------------------------------- + +// TestGenerateYAML_BothBackendAndConsumerRequestLimits verifies that backend and consumer +// request limits produce two advanced-ratelimit policies with distinct quota names: +// "request-limit" (backend, no app-id key) and "consumer-request-limit" (consumer, with app-id key). +func TestGenerateYAML_BothBackendAndConsumerRequestLimits(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Request: &model.RequestRateLimit{Enabled: true, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Count(yaml, "advanced-ratelimit") < 2 { + t.Error("expected two advanced-ratelimit policies (one backend, one consumer)") + } + if !strings.Contains(yaml, "consumer-request-limit") { + t.Error("expected 'consumer-request-limit' quota name for consumer policy") + } + if !strings.Contains(yaml, "x-wso2-application-id") { + t.Error("expected x-wso2-application-id in consumer policy key extraction") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_BothBackendAndConsumerCostLimits verifies that backend and consumer +// cost limits produce two llm-cost-based-ratelimit policies (one with consumerBased: true) +// and exactly one llm-cost policy (not duplicated). +func TestGenerateYAML_BothBackendAndConsumerCostLimits(t *testing.T) { + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{Enabled: true, Amount: 1.0, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Cost: &model.CostRateLimit{Enabled: true, Amount: 0.1, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Count(yaml, "llm-cost-based-ratelimit") < 2 { + t.Error("expected two llm-cost-based-ratelimit policies (backend + consumer)") + } + if !strings.Contains(yaml, "consumerBased: true") { + t.Error("expected consumerBased: true on consumer cost policy") + } + // llm-cost must appear exactly once — hasPolicy check prevents duplication + if strings.Count(yaml, "name: llm-cost\n") != 1 { + t.Errorf("expected exactly one llm-cost policy, got:\n%s", yaml) + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// --------------------------------------------------------------------------- +// Edge cases +// --------------------------------------------------------------------------- + +// TestGenerateYAML_DisabledLimitIsSkipped verifies that a limit with Enabled: false +// produces no rate limiting policies. +func TestGenerateYAML_DisabledLimitIsSkipped(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Token: &model.TokenRateLimit{Enabled: false, Count: count, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + Cost: &model.CostRateLimit{Enabled: false, Amount: 0.5, Reset: model.RateLimitResetWindow{Duration: 1, Unit: "hour"}}, + }, + }, + } + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if strings.Contains(yaml, "token-based-ratelimit") { + t.Error("expected no token-based-ratelimit for disabled token limit") + } + if strings.Contains(yaml, "llm-cost-based-ratelimit") { + t.Error("expected no llm-cost-based-ratelimit for disabled cost limit") + } + t.Logf("Generated YAML:\n%s", yaml) +} + +// TestGenerateYAML_AllThreeConsumerLimits verifies the full UI scenario from the +// screenshot: consumer request + token + cost all enabled, no backend limits. +func TestGenerateYAML_AllThreeConsumerLimits(t *testing.T) { + count := 100 + rl := &model.LLMRateLimitingConfig{ + ProviderLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{}, + }, + ConsumerLevel: &model.RateLimitingScopeConfig{ + Global: &model.RateLimitingLimitConfig{ + Request: &model.RequestRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + Token: &model.TokenRateLimit{ + Enabled: true, + Count: count, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + Cost: &model.CostRateLimit{ + Enabled: true, + Amount: 0.1, + Reset: model.RateLimitResetWindow{Duration: 2, Unit: "hour"}, + }, + }, + }, + } + + yaml, err := generateLLMProviderDeploymentYAML(providerWithConsumerLimits(rl), "anthropic") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + checks := []string{ + "advanced-ratelimit", + "token-based-ratelimit", + "llm-cost-based-ratelimit", + "consumerBased: true", + } + for _, want := range checks { + if !strings.Contains(yaml, want) { + t.Errorf("expected %q in generated YAML", want) + } + } + + t.Logf("Generated YAML:\n%s", yaml) +}