@@ -38,10 +38,6 @@ type GetClient = (session?: SessionData) => unknown;
3838
3939const BASE = '/v2/research' ;
4040
41- function asText ( data : unknown ) : string {
42- return JSON . stringify ( data , null , 2 ) ;
43- }
44-
4541/** Append a value (or repeated array values) to a URLSearchParams instance. */
4642function appendParam (
4743 params : URLSearchParams ,
@@ -63,6 +59,140 @@ function withQuery(path: string, params: URLSearchParams): string {
6359 return qs ? `${ path } ?${ qs } ` : path ;
6460}
6561
62+ // --- result formatting (ported from research-index-front/src/agent_eval.ts) ---
63+
64+ // Max authors to print per paper (with affiliations); the rest collapse to a
65+ // "+N more" tail so a large collaboration doesn't flood the context.
66+ const MAX_AUTHORS = 15 ;
67+ // Cap each abstract so a page of hits stays within the MCP output-token limit.
68+ const MAX_ABSTRACT_CHARS = 600 ;
69+ // Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept
70+ // university address) from bloating the authors line.
71+ const MAX_AFFIL_CHARS = 60 ;
72+ // Hard ceiling on the whole authors line, as a final guard.
73+ const MAX_AUTHORS_LINE_CHARS = 400 ;
74+
75+ interface PaperHit {
76+ paper_id ?: string ;
77+ ids ?: Record < string , string [ ] > ;
78+ title ?: string ;
79+ abstract ?: string ;
80+ // Search/metadata responses give a comma-joined string; some shapes give the
81+ // structured form — handle both.
82+ authors ?: string | { name : string ; affiliation ?: string } [ ] ;
83+ }
84+
85+ /** Best display id for a paper: its arXiv id, falling back to the canonical id. */
86+ function displayId ( p : PaperHit ) : string {
87+ return p . ids ?. arxiv ?. [ 0 ] ?? p . paper_id ?? '?' ;
88+ }
89+
90+ /** Format the authors line, accepting either the string or structured form. */
91+ function fmtAuthors (
92+ authors ?: string | { name : string ; affiliation ?: string } [ ]
93+ ) : string | null {
94+ if ( ! authors ) return null ;
95+ let shown : string [ ] ;
96+ let total : number ;
97+ if ( typeof authors === 'string' ) {
98+ const names = authors
99+ . split ( ',' )
100+ . map ( ( s ) => s . trim ( ) )
101+ . filter ( Boolean ) ;
102+ if ( names . length === 0 ) return null ;
103+ total = names . length ;
104+ shown = names . slice ( 0 , MAX_AUTHORS ) ;
105+ } else {
106+ if ( authors . length === 0 ) return null ;
107+ total = authors . length ;
108+ shown = authors . slice ( 0 , MAX_AUTHORS ) . map ( ( a ) => {
109+ const aff = a . affiliation ?. trim ( ) ;
110+ return aff ? `${ a . name } (${ aff . slice ( 0 , MAX_AFFIL_CHARS ) } )` : a . name ;
111+ } ) ;
112+ }
113+ const extra = total > MAX_AUTHORS ? `; +${ total - MAX_AUTHORS } more` : '' ;
114+ return ( 'Authors: ' + shown . join ( '; ' ) + extra ) . slice (
115+ 0 ,
116+ MAX_AUTHORS_LINE_CHARS
117+ ) ;
118+ }
119+
120+ /** Render ranked papers as `[id] title` / authors / abstract blocks. */
121+ function fmtHits ( results ?: PaperHit [ ] ) : string {
122+ if ( ! results || results . length === 0 ) return '(no results)' ;
123+ return results
124+ . map ( ( r ) => {
125+ const lines = [ `[${ displayId ( r ) } ] ${ r . title ?? '(untitled)' } ` ] ;
126+ const authors = fmtAuthors ( r . authors ) ;
127+ if ( authors ) lines . push ( authors ) ;
128+ lines . push (
129+ ( r . abstract || '(no abstract)' )
130+ . replace ( / \s + / g, ' ' )
131+ . slice ( 0 , MAX_ABSTRACT_CHARS )
132+ ) ;
133+ return lines . join ( '\n' ) ;
134+ } )
135+ . join ( '\n\n' ) ;
136+ }
137+
138+ // Cap GitHub matched content so a page of results stays within the MCP
139+ // output-token limit. Higher than abstracts since issue/PR threads carry the
140+ // signal (repro steps, stack traces) the agent actually needs to verify.
141+ const MAX_GITHUB_CONTENT_CHARS = 1200 ;
142+
143+ interface GitHubItem {
144+ resultType ?: string ;
145+ /** `owner/name`. */
146+ repo ?: string ;
147+ url ?: string ;
148+ /** History page type (e.g. `issue`, `pull`). Omitted for readmes. */
149+ pageType ?: string ;
150+ /** Issue/PR number. Omitted for readmes. */
151+ number ?: number ;
152+ /** Number of matched segments/chunks. */
153+ segmentCount ?: number ;
154+ /** Readme URL (readme results). */
155+ readmeUrl ?: string ;
156+ /** Short matched excerpt. */
157+ snippet ?: string ;
158+ /** Full matched content in markdown. */
159+ contentMd ?: string ;
160+ }
161+
162+ /**
163+ * Render GitHub history/readme hits as `[repo#number] (kind)` / url / body
164+ * blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes.
165+ * Markdown content keeps its newlines (so tables/code survive); only readmes and
166+ * snippets fall back when full content is absent.
167+ */
168+ function fmtGithub ( results ?: GitHubItem [ ] ) : string {
169+ if ( ! results || results . length === 0 ) return '(no results)' ;
170+ return results
171+ . map ( ( r ) => {
172+ const lines : string [ ] = [ ] ;
173+ if ( r . resultType === 'repo_readme' ) {
174+ lines . push ( `[${ r . repo ?? '?' } ] README` ) ;
175+ } else {
176+ const ref = r . number != null ? `#${ r . number } ` : '' ;
177+ const meta = [
178+ r . pageType ,
179+ r . segmentCount ? `${ r . segmentCount } segments` : '' ,
180+ ]
181+ . filter ( Boolean )
182+ . join ( ', ' ) ;
183+ lines . push ( `[${ r . repo ?? '?' } ${ ref } ]${ meta ? ` (${ meta } )` : '' } ` ) ;
184+ }
185+ const url = r . readmeUrl ?? r . url ;
186+ if ( url ) lines . push ( url ) ;
187+ const body = ( r . contentMd || r . snippet || '' ) . trim ( ) ;
188+ lines . push (
189+ body ? body . slice ( 0 , MAX_GITHUB_CONTENT_CHARS ) : '(no content)'
190+ ) ;
191+ return lines . join ( '\n' ) ;
192+ } )
193+ . join ( '\n\n' ) ;
194+ }
195+
66196/** Only present these tools when the session has research enabled. */
67197const canAccess = ( session ?: SessionData ) : boolean =>
68198 session ?. research === true ;
@@ -132,8 +262,10 @@ export function registerResearchTools(
132262 appendParam ( params , 'from' , from ) ;
133263 appendParam ( params , 'to' , to ) ;
134264 const client = getClient ( session ) as ClientLike ;
135- const res = await client . http . get ( withQuery ( `${ BASE } /papers` , params ) ) ;
136- return asText ( res . data ) ;
265+ const res = await client . http . get < { results ?: PaperHit [ ] } > (
266+ withQuery ( `${ BASE } /papers` , params )
267+ ) ;
268+ return fmtHits ( res . data ?. results ) ;
137269 } ,
138270 } ) ;
139271
@@ -186,13 +318,18 @@ export function registerResearchTools(
186318 if ( rerank != null ) appendParam ( params , 'rerank' , rerank ) ;
187319 appendParam ( params , 'anchor' , anchors ) ;
188320 const client = getClient ( session ) as ClientLike ;
189- const res = await client . http . get (
321+ const res = await client . http . get < {
322+ results ?: PaperHit [ ] ;
323+ pool_size ?: number ;
324+ note ?: string | null ;
325+ } > (
190326 withQuery (
191327 `${ BASE } /papers/${ encodeURIComponent ( primary ) } /similar` ,
192328 params
193329 )
194330 ) ;
195- return asText ( res . data ) ;
331+ const note = res . data ?. note ? `\nnote: ${ res . data . note } ` : '' ;
332+ return `${ fmtHits ( res . data ?. results ) } \n(pool_size=${ res . data ?. pool_size ?? 0 } )${ note } ` ;
196333 } ,
197334 } ) ;
198335
@@ -234,15 +371,17 @@ export function registerResearchTools(
234371 appendParam ( params , 'query' , question ) ;
235372 appendParam ( params , 'k' , k ) ;
236373 const client = getClient ( session ) as ClientLike ;
237- const res = await client . http . get (
374+ const res = await client . http . get < { passages ?: { text : string } [ ] } > (
238375 withQuery ( `${ BASE } /papers/${ encodeURIComponent ( arxiv_id ) } ` , params )
239376 ) ;
240- return asText ( res . data ) ;
377+ const passages = res . data ?. passages ?? [ ] ;
378+ return passages . length
379+ ? passages . map ( ( p ) => p . text ) . join ( '\n---\n' )
380+ : '(no full-text passages available for this paper)' ;
241381 } ,
242382 } ) ;
243383
244384 // --- search_github ---
245- // TODO: description pending — the user is writing this one.
246385 server . addTool ( {
247386 name : 'firecrawl_research_search_github' ,
248387 canAccess,
@@ -267,8 +406,10 @@ export function registerResearchTools(
267406 appendParam ( params , 'query' , query ) ;
268407 appendParam ( params , 'k' , k ) ;
269408 const client = getClient ( session ) as ClientLike ;
270- const res = await client . http . get ( withQuery ( `${ BASE } /github` , params ) ) ;
271- return asText ( res . data ) ;
409+ const res = await client . http . get < { results ?: GitHubItem [ ] } > (
410+ withQuery ( `${ BASE } /github` , params )
411+ ) ;
412+ return fmtGithub ( res . data ?. results ) ;
272413 } ,
273414 } ) ;
274415}
0 commit comments