Skip to content

Commit aea7dbf

Browse files
committed
Nick: maxResponseSize
1 parent c8f71ab commit aea7dbf

File tree

3 files changed

+70
-23
lines changed

3 files changed

+70
-23
lines changed

README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,25 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
2121
- Automatic retries and rate limiting
2222
- Cloud and self-hosted support
2323
- SSE support
24+
- **Context limit support for MCP compatibility**
25+
26+
## Context Limiting for MCP
27+
28+
All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
29+
30+
**Example Usage:**
31+
```json
32+
{
33+
"name": "firecrawl_scrape",
34+
"arguments": {
35+
"url": "https://example.com",
36+
"formats": ["markdown"],
37+
"maxResponseSize": 50000
38+
}
39+
}
40+
```
41+
42+
When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
2443

2544
> Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
2645

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "firecrawl-mcp",
3-
"version": "3.3.4",
3+
"version": "3.3.5",
44
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
55
"type": "module",
66
"bin": {

src/index.ts

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,15 @@ function getClient(session?: SessionData): FirecrawlApp {
153153
return createClient(session?.firecrawlApiKey);
154154
}
155155

156-
function asText(data: unknown): string {
157-
return JSON.stringify(data, null, 2);
156+
function asText(data: unknown, maxResponseSize?: number): string {
157+
const text = JSON.stringify(data, null, 2);
158+
159+
if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
160+
const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
161+
return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
162+
}
163+
164+
return text;
158165
}
159166

160167
// scrape tool (v2 semantics, minimal args)
@@ -229,12 +236,13 @@ const scrapeParamsSchema = z.object({
229236
.optional(),
230237
storeInCache: z.boolean().optional(),
231238
maxAge: z.number().optional(),
239+
maxResponseSize: z.number().optional(),
232240
});
233241

234242
server.addTool({
235243
name: 'firecrawl_scrape',
236244
description: `
237-
Scrape content from a single URL with advanced options.
245+
Scrape content from a single URL with advanced options.
238246
This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
239247
240248
**Best for:** Single page content extraction, when you know exactly which page contains the information.
@@ -248,11 +256,13 @@ This is the most powerful, fastest and most reliable scraper tool, if available
248256
"arguments": {
249257
"url": "https://example.com",
250258
"formats": ["markdown"],
251-
"maxAge": 172800000
259+
"maxAge": 172800000,
260+
"maxResponseSize": 50000
252261
}
253262
}
254263
\`\`\`
255264
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
265+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
256266
**Returns:** Markdown, HTML, or other formats as specified.
257267
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
258268
`,
@@ -261,12 +271,12 @@ ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions
261271
args: unknown,
262272
{ session, log }: { session?: SessionData; log: Logger }
263273
): Promise<string> => {
264-
const { url, ...options } = args as { url: string } & Record<string, unknown>;
274+
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
265275
const client = getClient(session);
266276
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
267277
log.info('Scraping URL', { url: String(url) });
268278
const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN } as any);
269-
return asText(res);
279+
return asText(res, maxResponseSize);
270280
},
271281
});
272282

@@ -278,13 +288,15 @@ Map a website to discover all indexed URLs on the site.
278288
**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
279289
**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
280290
**Common mistakes:** Using crawl to discover URLs instead of map.
291+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
281292
**Prompt Example:** "List all URLs on example.com."
282293
**Usage Example:**
283294
\`\`\`json
284295
{
285296
"name": "firecrawl_map",
286297
"arguments": {
287-
"url": "https://example.com"
298+
"url": "https://example.com",
299+
"maxResponseSize": 50000
288300
}
289301
}
290302
\`\`\`
@@ -297,17 +309,18 @@ Map a website to discover all indexed URLs on the site.
297309
includeSubdomains: z.boolean().optional(),
298310
limit: z.number().optional(),
299311
ignoreQueryParameters: z.boolean().optional(),
312+
maxResponseSize: z.number().optional(),
300313
}),
301314
execute: async (
302315
args: unknown,
303316
{ session, log }: { session?: SessionData; log: Logger }
304317
): Promise<string> => {
305-
const { url, ...options } = args as { url: string } & Record<string, unknown>;
318+
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
306319
const client = getClient(session);
307320
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
308321
log.info('Mapping URL', { url: String(url) });
309322
const res = await client.map(String(url), { ...cleaned, origin: ORIGIN } as any);
310-
return asText(res);
323+
return asText(res, maxResponseSize);
311324
},
312325
});
313326

@@ -366,10 +379,12 @@ The query also supports search operators, that you can use if needed to refine t
366379
"scrapeOptions": {
367380
"formats": ["markdown"],
368381
"onlyMainContent": true
369-
}
382+
},
383+
"maxResponseSize": 50000
370384
}
371385
}
372386
\`\`\`
387+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
373388
**Returns:** Array of search results (with optional scraped content).
374389
`,
375390
parameters: z.object({
@@ -382,20 +397,21 @@ The query also supports search operators, that you can use if needed to refine t
382397
.array(z.object({ type: z.enum(['web', 'images', 'news']) }))
383398
.optional(),
384399
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
400+
maxResponseSize: z.number().optional(),
385401
}),
386402
execute: async (
387403
args: unknown,
388404
{ session, log }: { session?: SessionData; log: Logger }
389405
): Promise<string> => {
390406
const client = getClient(session);
391-
const { query, ...opts } = args as Record<string, unknown>;
407+
const { query, maxResponseSize, ...opts } = args as { query: string; maxResponseSize?: number } & Record<string, unknown>;
392408
const cleaned = removeEmptyTopLevel(opts as Record<string, unknown>);
393409
log.info('Searching', { query: String(query) });
394410
const res = await client.search(query as string, {
395411
...(cleaned as any),
396412
origin: ORIGIN,
397413
});
398-
return asText(res);
414+
return asText(res, maxResponseSize);
399415
},
400416
});
401417

@@ -419,10 +435,12 @@ server.addTool({
419435
"limit": 20,
420436
"allowExternalLinks": false,
421437
"deduplicateSimilarURLs": true,
422-
"sitemap": "include"
438+
"sitemap": "include",
439+
"maxResponseSize": 50000
423440
}
424441
}
425442
\`\`\`
443+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
426444
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
427445
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
428446
`,
@@ -453,17 +471,18 @@ server.addTool({
453471
deduplicateSimilarURLs: z.boolean().optional(),
454472
ignoreQueryParameters: z.boolean().optional(),
455473
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
474+
maxResponseSize: z.number().optional(),
456475
}),
457476
execute: async (args, { session, log }) => {
458-
const { url, ...options } = args as Record<string, unknown>;
477+
const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
459478
const client = getClient(session);
460479
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
461480
log.info('Starting crawl', { url: String(url) });
462481
const res = await client.crawl(String(url), {
463482
...(cleaned as any),
464483
origin: ORIGIN,
465484
});
466-
return asText(res);
485+
return asText(res, maxResponseSize);
467486
},
468487
});
469488

@@ -477,20 +496,26 @@ Check the status of a crawl job.
477496
{
478497
"name": "firecrawl_check_crawl_status",
479498
"arguments": {
480-
"id": "550e8400-e29b-41d4-a716-446655440000"
499+
"id": "550e8400-e29b-41d4-a716-446655440000",
500+
"maxResponseSize": 50000
481501
}
482502
}
483503
\`\`\`
504+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
484505
**Returns:** Status and progress of the crawl job, including results if available.
485506
`,
486-
parameters: z.object({ id: z.string() }),
507+
parameters: z.object({
508+
id: z.string(),
509+
maxResponseSize: z.number().optional(),
510+
}),
487511
execute: async (
488512
args: unknown,
489513
{ session }: { session?: SessionData }
490514
): Promise<string> => {
515+
const { id, maxResponseSize } = args as { id: string; maxResponseSize?: number };
491516
const client = getClient(session);
492-
const res = await client.getCrawlStatus((args as any).id as string);
493-
return asText(res);
517+
const res = await client.getCrawlStatus(id);
518+
return asText(res, maxResponseSize);
494519
},
495520
});
496521

@@ -527,10 +552,12 @@ Extract structured information from web pages using LLM capabilities. Supports b
527552
},
528553
"allowExternalLinks": false,
529554
"enableWebSearch": false,
530-
"includeSubdomains": false
555+
"includeSubdomains": false,
556+
"maxResponseSize": 50000
531557
}
532558
}
533559
\`\`\`
560+
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
534561
**Returns:** Extracted structured data as defined by your schema.
535562
`,
536563
parameters: z.object({
@@ -540,13 +567,14 @@ Extract structured information from web pages using LLM capabilities. Supports b
540567
allowExternalLinks: z.boolean().optional(),
541568
enableWebSearch: z.boolean().optional(),
542569
includeSubdomains: z.boolean().optional(),
570+
maxResponseSize: z.number().optional(),
543571
}),
544572
execute: async (
545573
args: unknown,
546574
{ session, log }: { session?: SessionData; log: Logger }
547575
): Promise<string> => {
548576
const client = getClient(session);
549-
const a = args as Record<string, unknown>;
577+
const a = args as { maxResponseSize?: number } & Record<string, unknown>;
550578
log.info('Extracting from URLs', {
551579
count: Array.isArray(a.urls) ? a.urls.length : 0,
552580
});
@@ -560,7 +588,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
560588
origin: ORIGIN,
561589
});
562590
const res = await client.extract(extractBody as any);
563-
return asText(res);
591+
return asText(res, a.maxResponseSize);
564592
},
565593
});
566594
const PORT = Number(process.env.PORT || 3000);

0 commit comments

Comments
 (0)