Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ef99ad6
Rate limits and budgets
narengogi Sep 22, 2025
6d123db
remvoe bindings
narengogi Sep 30, 2025
d55ecf8
remove kv
narengogi Sep 30, 2025
169e7d2
Merge remote-tracking branch 'upstream/main' into feature/ratelimits-…
narengogi Sep 30, 2025
8407df1
rebase
narengogi Sep 30, 2025
1d99b8a
Apply suggestion from @matter-code-review[bot]
narengogi Oct 1, 2025
1c3d5c5
Apply suggestion from @matter-code-review[bot]
narengogi Oct 1, 2025
2b379fb
Apply suggestion from @matter-code-review[bot]
narengogi Oct 1, 2025
0bb75dd
Apply suggestion from @matter-code-review[bot]
narengogi Oct 1, 2025
05e42a3
handle settings
narengogi Oct 1, 2025
00c00e8
handle redis rate limiter tokens rate limiting when tokens to decreme…
narengogi Oct 7, 2025
a29315e
remove cache backend changes
narengogi Oct 9, 2025
9cc932d
remove unused imports
narengogi Oct 9, 2025
2505510
update settings example
narengogi Oct 9, 2025
bc56171
update settings initializer
narengogi Oct 9, 2025
99f6262
dont hardcode id
narengogi Oct 9, 2025
d331fd8
remove unused variable
narengogi Oct 9, 2025
f4cdbe3
handle nulls
narengogi Oct 9, 2025
d2ca7ef
remove import
narengogi Oct 9, 2025
b372b2f
delete transfer encoding for node
narengogi Oct 9, 2025
a30cfff
extend redis client backend to support redis specific functionality
narengogi Oct 10, 2025
ec153fb
dont crash on unhandled promise rejections and other changes like ren…
narengogi Oct 10, 2025
451e6d2
Merge remote-tracking branch 'upstream/main' into feature/ratelimits-…
narengogi Oct 21, 2025
ba11027
fix conf.example
narengogi Oct 21, 2025
06ae963
Merge remote-tracking branch 'upstream/main' into feature/ratelimits-…
narengogi Oct 24, 2025
421897e
Merge remote-tracking branch 'upstream/main' into feature/ratelimits-…
narengogi Oct 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions initializeSettings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
const organisationDetails = {
id: '00000000-0000-0000-0000-000000000000',
name: 'Portkey self hosted',
settings: {
debug_log: 1,
is_virtual_key_limit_enabled: 1,
allowed_guardrails: ['BASIC'],
},
workspaceDetails: {},
defaults: {
metadata: null,
},
usageLimits: [],
rateLimits: [],
organisationDefaults: {
input_guardrails: null,
},
};

const transformIntegrations = (integrations: any) => {
return integrations.map((integration: any) => {
return {
id: '1234567890', //need to do consistent hashing for caching
ai_provider_name: integration.provider,
model_config: {
...integration.credentials,
},
...(integration.credentials?.apiKey && {
key: integration.credentials.apiKey,
}),
slug: integration.slug,
usage_limits: null,
status: 'active',
integration_id: '1234567890',
object: 'virtual-key',
integration_details: {
id: '1234567890',
slug: integration.slug,
usage_limits: integration.usage_limits,
rate_limits: integration.rate_limits,
models: integration.models,
},
};
});
};

let settings: any = undefined;
try {
// @ts-expect-error
const settingsFile = await import('./settings.json');
if (settingsFile) {
settings = {};
settings.organisationDetails = organisationDetails;
if (settingsFile.integrations) {
settings.integrations = transformIntegrations(settingsFile.integrations);
}
}
} catch (error) {
console.log(
'WARNING: Unable to import settings from the path, please make sure the file exists',
error
);
}

export { settings };
92 changes: 89 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"async-retry": "^1.3.3",
"avsc": "^5.7.7",
"hono": "^4.6.10",
"ioredis": "^5.8.0",
"jose": "^6.0.11",
"patch-package": "^8.0.0",
"ws": "^8.18.0",
Expand Down
32 changes: 32 additions & 0 deletions settings.example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"integrations": [
{
"provider": "anthropic",
"slug": "dev_team_anthropic",
"credentials": {
"apiKey": "sk-ant-"
},
"rate_limits": [
{
"type": "requests",
"unit": "rph",
"value": 3
}
],
"usage_limits": [
{
"type": "tokens",
"credit_limit": 1000000,
"periodic_reset": "weekly"
}
],
"models": [
{
"slug": "claude-3-7-sonnet-20250219",
"status": "active",
"pricing_config": null
}
]
}
]
}
13 changes: 13 additions & 0 deletions src/globals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,16 @@ export enum BatchEndpoints {
COMPLETIONS = '/v1/completions',
EMBEDDINGS = '/v1/embeddings',
}

export const AtomicOperations = {
GET: 'GET',
RESET: 'RESET',
INCREMENT: 'INCREMENT',
DECREMENT: 'DECREMENT',
};

export enum RateLimiterKeyTypes {
VIRTUAL_KEY = 'VIRTUAL_KEY',
API_KEY = 'API_KEY',
INTEGRATION_WORKSPACE = 'INTEGRATION_WORKSPACE',
}
23 changes: 20 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { Context, Hono } from 'hono';
import { prettyJSON } from 'hono/pretty-json';
import { HTTPException } from 'hono/http-exception';
import { compress } from 'hono/compress';
import { getRuntimeKey } from 'hono/adapter';
import { env, getRuntimeKey } from 'hono/adapter';
// import { env } from 'hono/adapter' // Have to set this up for multi-environment deployment

// Middlewares
Expand Down Expand Up @@ -37,17 +37,34 @@ import { imageEditsHandler } from './handlers/imageEditsHandler';
// Config
import conf from '../conf.json';
import modelResponsesHandler from './handlers/modelResponsesHandler';
import {
createCacheBackendsLocal,
createCacheBackendsRedis,
createCacheBackendsCF,
} from './shared/services/cache';
import { messagesCountTokensHandler } from './handlers/messagesCountTokensHandler';

// Create a new Hono server instance
const app = new Hono();
const runtime = getRuntimeKey();

// cache beackends will only get created during worker or app initialization depending on the runtime
if (getRuntimeKey() === 'workerd') {
app.use('*', (c: Context, next) => {
createCacheBackendsCF(env(c));
return next();
});
} else if (getRuntimeKey() === 'node' && process.env.REDIS_CONNECTION_STRING) {
createCacheBackendsRedis(process.env.REDIS_CONNECTION_STRING);
} else {
createCacheBackendsLocal();
}

/**
* Middleware that conditionally applies compression middleware based on the runtime.
* Compression is automatically handled for lagon and workerd runtimes
* This check if its not any of the 2 and then applies the compress middleware to avoid double compression.
*/

const runtime = getRuntimeKey();
app.use('*', (c, next) => {
const runtimesThatDontNeedCompression = ['lagon', 'workerd', 'node'];
if (runtimesThatDontNeedCompression.includes(runtime)) {
Expand Down
13 changes: 9 additions & 4 deletions src/services/realtimeLlmEventParser.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Context } from 'hono';
import { addBackgroundTask } from '../utils/misc';

export class RealtimeLlmEventParser {
private sessionState: any;
Expand Down Expand Up @@ -48,7 +49,8 @@ export class RealtimeLlmEventParser {
this.sessionState.sessionDetails = { ...data.session };
const realtimeEventParser = c.get('realtimeEventParser');
if (realtimeEventParser) {
c.executionCtx.waitUntil(
addBackgroundTask(
c,
realtimeEventParser(
c,
sessionOptions,
Expand All @@ -69,7 +71,8 @@ export class RealtimeLlmEventParser {
this.sessionState.sessionDetails = { ...data.session };
const realtimeEventParser = c.get('realtimeEventParser');
if (realtimeEventParser) {
c.executionCtx.waitUntil(
addBackgroundTask(
c,
realtimeEventParser(
c,
sessionOptions,
Expand Down Expand Up @@ -106,7 +109,8 @@ export class RealtimeLlmEventParser {
const itemSequence = this.rebuildConversationSequence(
this.sessionState.conversation.items
);
c.executionCtx.waitUntil(
addBackgroundTask(
c,
realtimeEventParser(
c,
sessionOptions,
Expand All @@ -128,7 +132,8 @@ export class RealtimeLlmEventParser {
private handleError(c: Context, data: any, sessionOptions: any): void {
const realtimeEventParser = c.get('realtimeEventParser');
if (realtimeEventParser) {
c.executionCtx.waitUntil(
addBackgroundTask(
c,
realtimeEventParser(c, sessionOptions, {}, data, data.type)
);
}
Expand Down
Loading