Created
November 28, 2024 07:37
-
-
Save robrita/ccbccf9a7c1aa7b51a1621ae73c2a52b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <policies> | |
| <inbound> | |
| <base /> | |
| <!-- Use a high-limit to calculate usage --> | |
| <set-variable name="tpmAbsoluteMax" value="@(1000000)" /> | |
| <azure-openai-token-limit tokens-per-minute="@((int)context.Variables["tpmAbsoluteMax"])" counter-key="@(context.Subscription.Id)" estimate-prompt-tokens="true" remaining-tokens-variable-name="tpmRemaining" /> | |
| <!-- Pick the backend based on our TPM usage, the tpmPtuLimit could be set based on subscription ID or anything --> | |
| <set-variable name="tpmPtuLimit" value="@(1000)" /> | |
| <set-variable name="tpmUsed" value="@(context.Variables.ContainsKey("tpmRemaining") ? (int)context.Variables["tpmAbsoluteMax"] - (int)context.Variables["tpmRemaining"] : 0)" /> | |
| <choose> | |
| <when condition="@((int)context.Variables["tpmUsed"] > (int)context.Variables["tpmPtuLimit"])"> | |
| <set-variable name="targetBackend" value="demo-openai-payg-kv3xbafpzpzra" /> | |
| </when> | |
| <otherwise> | |
| <set-variable name="targetBackend" value="demo-openai-ptu-kv3xbafpzpzra" /> | |
| </otherwise> | |
| </choose> | |
| <set-backend-service backend-id="@((string)context.Variables["targetBackend"])" /> | |
| <!-- Configure authentication to AOAI --> | |
| <set-header name="ocp-apim-subscription-key" exists-action="delete" /> | |
| <authentication-managed-identity resource="https://cognitiveservices.azure.com/" /> | |
| </inbound> | |
| <backend> | |
| <base /> | |
| </backend> | |
| <outbound> | |
| <set-header name="X-backend-id" exists-action="override"> | |
| <value>@((string)context.Variables["targetBackend"])</value> | |
| </set-header> | |
| <base /> | |
| </outbound> | |
| <on-error> | |
| <base /> | |
| </on-error> | |
| </policies> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment