Merge pull request #1118 from narengogi/feat/bedrock-inference-profiles

VisargD · web-flow · commit a6fe2d91c2f8 · 2025-06-11T13:33:08.000+05:30
feature: inference profiles for bedrock
diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
@@ -323,6 +323,7 @@ export async function tryPost(
       fn,
       c,
       gatewayRequestURL: c.req.url,
+      params: params,
     }));
   const endpoint =
     fn === 'proxy'
diff --git a/src/providers/bedrock/api.ts b/src/providers/bedrock/api.ts
@@ -5,6 +5,7 @@ import { bedrockInvokeModels } from './constants';
 import {
   generateAWSHeaders,
   getAssumedRoleCredentials,
+  getFoundationModelFromInferenceProfile,
   providerAssumedRoleCredentials,
 } from './utils';
 import { GatewayError } from '../../errors/GatewayError';
@@ -101,7 +102,20 @@ const setRouteSpecificHeaders = (
 };
 
 const BedrockAPIConfig: BedrockAPIConfigInterface = {
-  getBaseURL: ({ providerOptions, fn, gatewayRequestURL }) => {
+  getBaseURL: async ({ c, providerOptions, fn, gatewayRequestURL, params }) => {
+    const model = decodeURIComponent(params?.model || '');
+    if (model.includes('arn:aws') && params) {
+      const foundationModel = model.includes('foundation-model/')
+        ? model.split('/').pop()
+        : await getFoundationModelFromInferenceProfile(
+            c,
+            model,
+            providerOptions
+          );
+      if (foundationModel) {
+        params.foundationModel = foundationModel;
+      }
+    }
     if (fn === 'retrieveFile') {
       const s3URL = decodeURIComponent(
         gatewayRequestURL.split('/v1/files/')[1]
diff --git a/src/providers/bedrock/getBatchOutput.ts b/src/providers/bedrock/getBatchOutput.ts
@@ -53,7 +53,7 @@ export const BedrockGetBatchOutputRequestHandler = async ({
     // get s3 file id from batch details
     // get file from s3
     // return file
-    const baseUrl = BedrockAPIConfig.getBaseURL({
+    const baseUrl = await BedrockAPIConfig.getBaseURL({
       providerOptions,
       fn: 'retrieveBatch',
       c,
diff --git a/src/providers/bedrock/index.ts b/src/providers/bedrock/index.ts
@@ -89,7 +89,8 @@ const BedrockConfig: ProviderConfigs = {
     let config: ProviderConfigs = {};
 
     if (params.model) {
-      const providerModel = params?.model?.replace(/^(us\.|eu\.)/, '');
+      let providerModel = params.foundationModel || params.model;
+      providerModel = providerModel.replace(/^(us\.|eu\.)/, '');
       const providerModelArray = providerModel?.split('.');
       const provider = providerModelArray?.[0];
       const model = providerModelArray?.slice(1).join('.');
diff --git a/src/providers/bedrock/retrieveFileContent.ts b/src/providers/bedrock/retrieveFileContent.ts
@@ -19,7 +19,7 @@ export const BedrockRetrieveFileContentRequestHandler = async ({
 }) => {
   try {
     // construct the base url and endpoint
-    const baseURL = BedrockAPIConfig.getBaseURL({
+    const baseURL = await BedrockAPIConfig.getBaseURL({
       providerOptions,
       fn: 'retrieveFileContent',
       c,
diff --git a/src/providers/bedrock/types.ts b/src/providers/bedrock/types.ts
@@ -64,3 +64,17 @@ export interface BedrockFinetuneRecord {
   outputModelName?: string;
   outputModelArn?: string;
 }
+
+export interface BedrockInferenceProfile {
+  inferenceProfileName: string;
+  description: string;
+  createdAt: string;
+  updatedAt: string;
+  inferenceProfileArn: string;
+  models: {
+    modelArn: string;
+  }[];
+  inferenceProfileId: string;
+  status: string;
+  type: string;
+}
diff --git a/src/providers/bedrock/utils.ts b/src/providers/bedrock/utils.ts
@@ -10,7 +10,7 @@ import {
 } from './chatComplete';
 import { Options } from '../../types/requestBody';
 import { GatewayError } from '../../errors/GatewayError';
-import { BedrockFinetuneRecord } from './types';
+import { BedrockFinetuneRecord, BedrockInferenceProfile } from './types';
 import { FinetuneRequest } from '../types';
 
 export const generateAWSHeaders = async (
@@ -404,3 +404,80 @@ export const populateHyperParameters = (value: FinetuneRequest) => {
 
   return hyperParameters;
 };
+
+export const getInferenceProfile = async (
+  inferenceProfileIdentifier: string,
+  awsRegion: string,
+  awsAccessKeyId: string,
+  awsSecretAccessKey: string,
+  awsSessionToken?: string
+) => {
+  const url = `https://bedrock.${awsRegion}.amazonaws.com/inference-profiles/${encodeURIComponent(decodeURIComponent(inferenceProfileIdentifier))}`;
+
+  const headers = await generateAWSHeaders(
+    undefined,
+    { 'content-type': 'application/json' },
+    url,
+    'GET',
+    'bedrock',
+    awsRegion,
+    awsAccessKeyId,
+    awsSecretAccessKey,
+    awsSessionToken
+  );
+
+  try {
+    const response = await fetch(url, {
+      method: 'GET',
+      headers,
+    });
+
+    if (!response.ok) {
+      throw new Error(
+        `Failed to get inference profile: ${response.status} ${response.statusText}`
+      );
+    }
+
+    return (await response.json()) as BedrockInferenceProfile;
+  } catch (error) {
+    console.error('Error getting inference profile:', error);
+    throw error;
+  }
+};
+
+export const getFoundationModelFromInferenceProfile = async (
+  c: Context,
+  inferenceProfileIdentifier: string,
+  providerOptions: Options
+) => {
+  try {
+    const getFromCacheByKey = c.get('getFromCacheByKey');
+    const putInCacheWithValue = c.get('putInCacheWithValue');
+    const cacheKey = `bedrock-inference-profile-${inferenceProfileIdentifier}`;
+    const cachedFoundationModel = getFromCacheByKey
+      ? await getFromCacheByKey(env(c), cacheKey)
+      : null;
+    if (cachedFoundationModel) {
+      return cachedFoundationModel;
+    }
+
+    const inferenceProfile = await getInferenceProfile(
+      inferenceProfileIdentifier || '',
+      providerOptions.awsRegion || '',
+      providerOptions.awsAccessKeyId || '',
+      providerOptions.awsSecretAccessKey || '',
+      providerOptions.awsSessionToken || ''
+    );
+
+    // modelArn is always like arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2:1
+    const foundationModel = inferenceProfile?.models?.[0]?.modelArn
+      ?.split('/')
+      ?.pop();
+    if (putInCacheWithValue) {
+      putInCacheWithValue(env(c), cacheKey, foundationModel, 86400);
+    }
+    return foundationModel;
+  } catch (error) {
+    return null;
+  }
+};
diff --git a/src/providers/types.ts b/src/providers/types.ts
@@ -50,6 +50,7 @@ export interface ProviderAPIConfig {
     requestHeaders?: Record<string, string>;
     c: Context;
     gatewayRequestURL: string;
+    params?: Params;
   }) => Promise<string> | string;
   /** A function to generate the endpoint based on parameters */
   getEndpoint: (args: {
diff --git a/src/types/requestBody.ts b/src/types/requestBody.ts
@@ -429,6 +429,7 @@ export interface Params {
   // Embeddings specific
   dimensions?: number;
   parameters?: any;
+  [key: string]: any;
 }
 
 interface Examples {

Original file line number	Diff line number	Diff line change
`@@ -429,6 +429,7 @@ export interface Params {`
`429`	`429`	`// Embeddings specific`
`430`	`430`	`dimensions?: number;`
`431`	`431`	`parameters?: any;`
	`432`	`+ [key: string]: any;`
`432`	`433`	`}`
`433`	`434`
`434`	`435`	`interface Examples {`