Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions src/state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ const ACTOR_CACHE_MAX_SIZE = 500;
const ACTOR_CACHE_TTL_SECS = 30 * 60; // 30 minutes
const APIFY_DOCS_CACHE_MAX_SIZE = 500;
const APIFY_DOCS_CACHE_TTL_SECS = 60 * 60; // 1 hour
const MCP_SERVER_CACHE_MAX_SIZE = 500;
const MCP_SERVER_CACHE_TTL_SECS = 30 * 60; // 30 minutes

export const actorDefinitionCache = new TTLLRUCache<ActorDefinitionWithInfo>(
ACTOR_CACHE_MAX_SIZE,
Expand All @@ -18,9 +16,3 @@ export const searchApifyDocsCache = new TTLLRUCache<ApifyDocsSearchResult[]>(
);
/** Stores processed Markdown content */
export const fetchApifyDocsCache = new TTLLRUCache<string>(APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS);
/**
* Stores MCP server resolution per actor:
* - false: not an MCP server
* - string: MCP server URL
*/
export const mcpServerCache = new TTLLRUCache<boolean | string>(MCP_SERVER_CACHE_MAX_SIZE, MCP_SERVER_CACHE_TTL_SECS);
69 changes: 30 additions & 39 deletions src/utils/actor.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,30 @@
import type { ApifyClient } from '../apify_client.js';
import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js';
import { actorDefinitionCache, mcpServerCache } from '../state.js';
import { actorDefinitionCache } from '../state.js';
import { getActorDefinition } from '../tools/build.js';
import type { ActorDefinitionStorage, ActorDefinitionWithInfo, DatasetItem } from '../types.js';
import { getValuesByDotKeys } from './generic.js';
import { getUserInfoCached } from './userid_cache.js';

/**
* `actorDefinitionCache` is process-wide, so a private Actor's definition must never be served from it to
* anyone but its owner — else another token on the same process reads it with no auth check. Two invariants
* keep this gate from inverting into a leak:
* 1. `info.userId` is the platform-set OWNER, not the fetching token — so a non-owner's re-fetch can't
* overwrite the cached ownership.
* 2. The caller is identified by `user('me')` under their own token (the same identity the platform
* authorizes with) and `null` is the sole non-identity sentinel — so a hit grants no more than a bare
* re-fetch would. Don't drop the `!== null` guard or swap in a cheaper identity source.
* Trade-off: an org-owned private Actor is cached under the org's userId, so an org member
* calling with a personal token never matches and re-fetches every time.
* Fail-safe (no leak), just uncached for members - accepted over an org-membership lookup
* that would put a per-call API round trip back on * this path.
*/
Comment thread
MQ37 marked this conversation as resolved.
async function callerMaySeeCachedActor(cached: ActorDefinitionWithInfo, apifyClient: ApifyClient): Promise<boolean> {
if (cached.info.isPublic) return true;
const { userId } = await getUserInfoCached(apifyClient.token, apifyClient);
return userId !== null && userId === cached.info.userId;
}

/**
* Returns the cached Actor definition + info, fetching from the platform on miss
Expand All @@ -17,52 +38,22 @@ export async function getActorDefinitionCached(
apifyClient: ApifyClient,
): Promise<ActorDefinitionWithInfo | null> {
const cached = actorDefinitionCache.get(actorIdOrName);
if (cached) return cached;
if (cached && (await callerMaySeeCachedActor(cached, apifyClient))) return cached;
const fetched = await getActorDefinition(actorIdOrName, apifyClient);
if (fetched) actorDefinitionCache.set(actorIdOrName, fetched);
return fetched;
}

/**
* Resolve and cache the MCP server URL for the given Actor.
* - Returns a string URL when the Actor exposes an MCP server
* - Returns false when the Actor is not an MCP server
* Uses a TTL LRU cache to avoid repeated API calls.
* Resolve the Actor's MCP server URL, or `false` if it isn't an MCP server. The URL is a pure function of
* the definition (`getActorMCPServerURL` does no I/O), so this rides the authorization-gated
* `getActorDefinitionCached` instead of a separate cache that would leak a private Actor's URL across tenants.
*/
export async function getActorMcpUrlCached(actorIdOrName: string, apifyClient: ApifyClient): Promise<string | false> {
const cached = mcpServerCache.get(actorIdOrName);
if (cached !== null && cached !== undefined) {
return cached as string | false;
}

try {
const actorDefinitionWithInfo = await getActorDefinitionCached(actorIdOrName, apifyClient);
const definition = actorDefinitionWithInfo?.definition;
const mcpPath = definition && getActorMCPServerPath(definition);
if (mcpPath) {
const url = await getActorMCPServerURL(definition.id, mcpPath);
mcpServerCache.set(actorIdOrName, url);
return url;
}

mcpServerCache.set(actorIdOrName, false);
return false;
} catch (error) {
// Check if it's a "not found" error (404 or 400 status codes)
const isNotFound =
typeof error === 'object' &&
error !== null &&
'statusCode' in error &&
(error.statusCode === 404 || error.statusCode === 400);

if (isNotFound) {
// Actor doesn't exist - cache false and return false
mcpServerCache.set(actorIdOrName, false);
return false;
}
// Real server error - don't cache, let it propagate
throw error;
}
const definition = (await getActorDefinitionCached(actorIdOrName, apifyClient))?.definition;
const mcpPath = definition && getActorMCPServerPath(definition);
if (!mcpPath) return false;
return getActorMCPServerURL(definition.id, mcpPath);
}

/**
Expand Down
119 changes: 119 additions & 0 deletions tests/unit/utils.actor.cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';

import type { ApifyClient } from '../../src/apify_client.js';
import type { ActorDefinitionWithInfo } from '../../src/types.js';

vi.mock('../../src/tools/build.js', () => ({ getActorDefinition: vi.fn() }));
vi.mock('../../src/utils/userid_cache.js', () => ({ getUserInfoCached: vi.fn() }));

import { actorDefinitionCache } from '../../src/state.js';
import { getActorDefinition } from '../../src/tools/build.js';
import { getActorDefinitionCached, getActorMcpUrlCached } from '../../src/utils/actor.js';
import { getUserInfoCached } from '../../src/utils/userid_cache.js';

const getActorDefinitionMock = vi.mocked(getActorDefinition);
const getUserInfoCachedMock = vi.mocked(getUserInfoCached);

// Each test uses a unique Actor name, so the shared module-level cache never collides between cases.
function seedCache(
name: string,
isPublic: boolean,
ownerUserId: string,
opts: { id?: string; webServerMcpPath?: string } = {},
): ActorDefinitionWithInfo {
const id = opts.id ?? name;
const entry = {
definition: {
id,
actorFullName: name,
...(opts.webServerMcpPath && { webServerMcpPath: opts.webServerMcpPath }),
},
info: { id, isPublic, userId: ownerUserId },
} as unknown as ActorDefinitionWithInfo;
actorDefinitionCache.set(name, entry);
return entry;
}

const client = { token: 'caller-token' } as unknown as ApifyClient;

beforeEach(() => {
getActorDefinitionMock.mockReset();
getUserInfoCachedMock.mockReset();
});

describe('getActorDefinitionCached — tenant isolation', () => {
it('serves a cached public Actor to any caller without an ownership check', async () => {
const cached = seedCache('acme/public-1', true, 'owner-1');

const result = await getActorDefinitionCached('acme/public-1', client);

expect(result).toBe(cached);
expect(getUserInfoCachedMock).not.toHaveBeenCalled();
expect(getActorDefinitionMock).not.toHaveBeenCalled();
});

it('serves a cached private Actor to its owner', async () => {
const cached = seedCache('acme/private-owner', false, 'owner-2');
getUserInfoCachedMock.mockResolvedValue({ userId: 'owner-2', userPlanTier: 'FREE', isOrganization: false });

const result = await getActorDefinitionCached('acme/private-owner', client);

expect(result).toBe(cached);
expect(getActorDefinitionMock).not.toHaveBeenCalled();
});

it('does NOT serve a cached private Actor to a non-owner — returns the re-fetched object, never the cached one', async () => {
const cached = seedCache('acme/private-other', false, 'owner-3');
getUserInfoCachedMock.mockResolvedValue({ userId: 'intruder', userPlanTier: 'FREE', isOrganization: false });
const refetched = {
definition: {},
info: { isPublic: false, userId: 'owner-3' },
} as unknown as ActorDefinitionWithInfo;
getActorDefinitionMock.mockResolvedValue(refetched);

const result = await getActorDefinitionCached('acme/private-other', client);

expect(result).toBe(refetched);
expect(result).not.toBe(cached);
expect(getActorDefinitionMock).toHaveBeenCalledWith('acme/private-other', client);
});

it('does NOT serve a cached private Actor to an anonymous caller', async () => {
seedCache('acme/private-anon', false, 'owner-4');
getUserInfoCachedMock.mockResolvedValue({ userId: null, userPlanTier: 'FREE', isOrganization: false });
getActorDefinitionMock.mockResolvedValue(null);

const result = await getActorDefinitionCached('acme/private-anon', client);

expect(result).toBeNull();
expect(getActorDefinitionMock).toHaveBeenCalledWith('acme/private-anon', client);
});
});

describe('getActorMcpUrlCached — tenant isolation', () => {
it('derives the MCP URL from a cached Actor the caller may see', async () => {
seedCache('acme/mcp-public', true, 'owner-6', { id: 'actorpub', webServerMcpPath: '/mcp' });

const result = await getActorMcpUrlCached('acme/mcp-public', client);

expect(result).toBe('https://actorpub.apify.actor/mcp');
expect(getActorDefinitionMock).not.toHaveBeenCalled();
});

it('does NOT leak a cached private Actor MCP URL to a non-owner — re-fetches and returns false', async () => {
seedCache('acme/mcp-private', false, 'owner-7', { id: 'actorpriv', webServerMcpPath: '/mcp' });
getUserInfoCachedMock.mockResolvedValue({ userId: 'intruder', userPlanTier: 'FREE', isOrganization: false });
getActorDefinitionMock.mockResolvedValue(null); // intruder's own fetch is unauthorized

const result = await getActorMcpUrlCached('acme/mcp-private', client);

expect(result).toBe(false);
expect(getActorDefinitionMock).toHaveBeenCalledWith('acme/mcp-private', client);
});

it('returns false for a non-existent Actor without throwing', async () => {
getActorDefinitionMock.mockResolvedValue(null);

await expect(getActorMcpUrlCached('acme/missing', client)).resolves.toBe(false);
});
});
Loading