Skip to content

Instantly share code, notes, and snippets.

@robrita
Created November 28, 2024 07:37
Show Gist options
  • Save robrita/ccbccf9a7c1aa7b51a1621ae73c2a52b to your computer and use it in GitHub Desktop.
Save robrita/ccbccf9a7c1aa7b51a1621ae73c2a52b to your computer and use it in GitHub Desktop.
<policies>
<inbound>
<base />
<!-- Use a high-limit to calculate usage -->
<set-variable name="tpmAbsoluteMax" value="@(1000000)" />
<azure-openai-token-limit tokens-per-minute="@((int)context.Variables["tpmAbsoluteMax"])" counter-key="@(context.Subscription.Id)" estimate-prompt-tokens="true" remaining-tokens-variable-name="tpmRemaining" />
<!-- Pick the backend based on our TPM usage, the tpmPtuLimit could be set based on subscription ID or anything -->
<set-variable name="tpmPtuLimit" value="@(1000)" />
<set-variable name="tpmUsed" value="@(context.Variables.ContainsKey("tpmRemaining") ? (int)context.Variables["tpmAbsoluteMax"] - (int)context.Variables["tpmRemaining"] : 0)" />
<choose>
<when condition="@((int)context.Variables["tpmUsed"] > (int)context.Variables["tpmPtuLimit"])">
<set-variable name="targetBackend" value="demo-openai-payg-kv3xbafpzpzra" />
</when>
<otherwise>
<set-variable name="targetBackend" value="demo-openai-ptu-kv3xbafpzpzra" />
</otherwise>
</choose>
<set-backend-service backend-id="@((string)context.Variables["targetBackend"])" />
<!-- Configure authentication to AOAI -->
<set-header name="ocp-apim-subscription-key" exists-action="delete" />
<authentication-managed-identity resource="https://cognitiveservices.azure.com/" />
</inbound>
<backend>
<base />
</backend>
<outbound>
<set-header name="X-backend-id" exists-action="override">
<value>@((string)context.Variables["targetBackend"])</value>
</set-header>
<base />
</outbound>
<on-error>
<base />
</on-error>
</policies>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment