Skip to content

Commit 8b62904

Browse files
committed
fixes
1 parent 52b173e commit 8b62904

2 files changed

Lines changed: 155 additions & 14 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "firecrawl-mcp",
3-
"version": "3.20.3",
3+
"version": "3.20.4",
44
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
55
"type": "module",
66
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",

src/research.ts

Lines changed: 154 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ type GetClient = (session?: SessionData) => unknown;
3838

3939
const BASE = '/v2/research';
4040

41-
function asText(data: unknown): string {
42-
return JSON.stringify(data, null, 2);
43-
}
44-
4541
/** Append a value (or repeated array values) to a URLSearchParams instance. */
4642
function appendParam(
4743
params: URLSearchParams,
@@ -63,6 +59,140 @@ function withQuery(path: string, params: URLSearchParams): string {
6359
return qs ? `${path}?${qs}` : path;
6460
}
6561

62+
// --- result formatting (ported from research-index-front/src/agent_eval.ts) ---
63+
64+
// Max authors to print per paper (with affiliations); the rest collapse to a
65+
// "+N more" tail so a large collaboration doesn't flood the context.
66+
const MAX_AUTHORS = 15;
67+
// Cap each abstract so a page of hits stays within the MCP output-token limit.
68+
const MAX_ABSTRACT_CHARS = 600;
69+
// Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept
70+
// university address) from bloating the authors line.
71+
const MAX_AFFIL_CHARS = 60;
72+
// Hard ceiling on the whole authors line, as a final guard.
73+
const MAX_AUTHORS_LINE_CHARS = 400;
74+
75+
interface PaperHit {
76+
paper_id?: string;
77+
ids?: Record<string, string[]>;
78+
title?: string;
79+
abstract?: string;
80+
// Search/metadata responses give a comma-joined string; some shapes give the
81+
// structured form — handle both.
82+
authors?: string | { name: string; affiliation?: string }[];
83+
}
84+
85+
/** Best display id for a paper: its arXiv id, falling back to the canonical id. */
86+
function displayId(p: PaperHit): string {
87+
return p.ids?.arxiv?.[0] ?? p.paper_id ?? '?';
88+
}
89+
90+
/** Format the authors line, accepting either the string or structured form. */
91+
function fmtAuthors(
92+
authors?: string | { name: string; affiliation?: string }[]
93+
): string | null {
94+
if (!authors) return null;
95+
let shown: string[];
96+
let total: number;
97+
if (typeof authors === 'string') {
98+
const names = authors
99+
.split(',')
100+
.map((s) => s.trim())
101+
.filter(Boolean);
102+
if (names.length === 0) return null;
103+
total = names.length;
104+
shown = names.slice(0, MAX_AUTHORS);
105+
} else {
106+
if (authors.length === 0) return null;
107+
total = authors.length;
108+
shown = authors.slice(0, MAX_AUTHORS).map((a) => {
109+
const aff = a.affiliation?.trim();
110+
return aff ? `${a.name} (${aff.slice(0, MAX_AFFIL_CHARS)})` : a.name;
111+
});
112+
}
113+
const extra = total > MAX_AUTHORS ? `; +${total - MAX_AUTHORS} more` : '';
114+
return ('Authors: ' + shown.join('; ') + extra).slice(
115+
0,
116+
MAX_AUTHORS_LINE_CHARS
117+
);
118+
}
119+
120+
/** Render ranked papers as `[id] title` / authors / abstract blocks. */
121+
function fmtHits(results?: PaperHit[]): string {
122+
if (!results || results.length === 0) return '(no results)';
123+
return results
124+
.map((r) => {
125+
const lines = [`[${displayId(r)}] ${r.title ?? '(untitled)'}`];
126+
const authors = fmtAuthors(r.authors);
127+
if (authors) lines.push(authors);
128+
lines.push(
129+
(r.abstract || '(no abstract)')
130+
.replace(/\s+/g, ' ')
131+
.slice(0, MAX_ABSTRACT_CHARS)
132+
);
133+
return lines.join('\n');
134+
})
135+
.join('\n\n');
136+
}
137+
138+
// Cap GitHub matched content so a page of results stays within the MCP
139+
// output-token limit. Higher than abstracts since issue/PR threads carry the
140+
// signal (repro steps, stack traces) the agent actually needs to verify.
141+
const MAX_GITHUB_CONTENT_CHARS = 1200;
142+
143+
interface GitHubItem {
144+
resultType?: string;
145+
/** `owner/name`. */
146+
repo?: string;
147+
url?: string;
148+
/** History page type (e.g. `issue`, `pull`). Omitted for readmes. */
149+
pageType?: string;
150+
/** Issue/PR number. Omitted for readmes. */
151+
number?: number;
152+
/** Number of matched segments/chunks. */
153+
segmentCount?: number;
154+
/** Readme URL (readme results). */
155+
readmeUrl?: string;
156+
/** Short matched excerpt. */
157+
snippet?: string;
158+
/** Full matched content in markdown. */
159+
contentMd?: string;
160+
}
161+
162+
/**
163+
* Render GitHub history/readme hits as `[repo#number] (kind)` / url / body
164+
* blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes.
165+
* Markdown content keeps its newlines (so tables/code survive); only readmes and
166+
* snippets fall back when full content is absent.
167+
*/
168+
function fmtGithub(results?: GitHubItem[]): string {
169+
if (!results || results.length === 0) return '(no results)';
170+
return results
171+
.map((r) => {
172+
const lines: string[] = [];
173+
if (r.resultType === 'repo_readme') {
174+
lines.push(`[${r.repo ?? '?'}] README`);
175+
} else {
176+
const ref = r.number != null ? `#${r.number}` : '';
177+
const meta = [
178+
r.pageType,
179+
r.segmentCount ? `${r.segmentCount} segments` : '',
180+
]
181+
.filter(Boolean)
182+
.join(', ');
183+
lines.push(`[${r.repo ?? '?'}${ref}]${meta ? ` (${meta})` : ''}`);
184+
}
185+
const url = r.readmeUrl ?? r.url;
186+
if (url) lines.push(url);
187+
const body = (r.contentMd || r.snippet || '').trim();
188+
lines.push(
189+
body ? body.slice(0, MAX_GITHUB_CONTENT_CHARS) : '(no content)'
190+
);
191+
return lines.join('\n');
192+
})
193+
.join('\n\n');
194+
}
195+
66196
/** Only present these tools when the session has research enabled. */
67197
const canAccess = (session?: SessionData): boolean =>
68198
session?.research === true;
@@ -132,8 +262,10 @@ export function registerResearchTools(
132262
appendParam(params, 'from', from);
133263
appendParam(params, 'to', to);
134264
const client = getClient(session) as ClientLike;
135-
const res = await client.http.get(withQuery(`${BASE}/papers`, params));
136-
return asText(res.data);
265+
const res = await client.http.get<{ results?: PaperHit[] }>(
266+
withQuery(`${BASE}/papers`, params)
267+
);
268+
return fmtHits(res.data?.results);
137269
},
138270
});
139271

@@ -186,13 +318,18 @@ export function registerResearchTools(
186318
if (rerank != null) appendParam(params, 'rerank', rerank);
187319
appendParam(params, 'anchor', anchors);
188320
const client = getClient(session) as ClientLike;
189-
const res = await client.http.get(
321+
const res = await client.http.get<{
322+
results?: PaperHit[];
323+
pool_size?: number;
324+
note?: string | null;
325+
}>(
190326
withQuery(
191327
`${BASE}/papers/${encodeURIComponent(primary)}/similar`,
192328
params
193329
)
194330
);
195-
return asText(res.data);
331+
const note = res.data?.note ? `\nnote: ${res.data.note}` : '';
332+
return `${fmtHits(res.data?.results)}\n(pool_size=${res.data?.pool_size ?? 0})${note}`;
196333
},
197334
});
198335

@@ -234,15 +371,17 @@ export function registerResearchTools(
234371
appendParam(params, 'query', question);
235372
appendParam(params, 'k', k);
236373
const client = getClient(session) as ClientLike;
237-
const res = await client.http.get(
374+
const res = await client.http.get<{ passages?: { text: string }[] }>(
238375
withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params)
239376
);
240-
return asText(res.data);
377+
const passages = res.data?.passages ?? [];
378+
return passages.length
379+
? passages.map((p) => p.text).join('\n---\n')
380+
: '(no full-text passages available for this paper)';
241381
},
242382
});
243383

244384
// --- search_github ---
245-
// TODO: description pending — the user is writing this one.
246385
server.addTool({
247386
name: 'firecrawl_research_search_github',
248387
canAccess,
@@ -267,8 +406,10 @@ export function registerResearchTools(
267406
appendParam(params, 'query', query);
268407
appendParam(params, 'k', k);
269408
const client = getClient(session) as ClientLike;
270-
const res = await client.http.get(withQuery(`${BASE}/github`, params));
271-
return asText(res.data);
409+
const res = await client.http.get<{ results?: GitHubItem[] }>(
410+
withQuery(`${BASE}/github`, params)
411+
);
412+
return fmtGithub(res.data?.results);
272413
},
273414
});
274415
}

0 commit comments

Comments
 (0)