Nick: maxResponseSize

nickscamara · nickscamara · commit aea7dbff969f · 2025-09-26T12:58:26.000+03:00
diff --git a/README.md b/README.md
@@ -21,6 +21,25 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
 - Automatic retries and rate limiting
 - Cloud and self-hosted support
 - SSE support
+- **Context limit support for MCP compatibility**
+
+## Context Limiting for MCP
+
+All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
+
+**Example Usage:**
+```json
+{
+  "name": "firecrawl_scrape",
+  "arguments": {
+    "url": "https://example.com",
+    "formats": ["markdown"],
+    "maxResponseSize": 50000
+  }
+}
+```
+
+When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
 
 > Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
 
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "firecrawl-mcp",
-  "version": "3.3.4",
+  "version": "3.3.5",
   "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
   "type": "module",
   "bin": {
diff --git a/src/index.ts b/src/index.ts
@@ -153,8 +153,15 @@ function getClient(session?: SessionData): FirecrawlApp {
   return createClient(session?.firecrawlApiKey);
 }
 
-function asText(data: unknown): string {
-  return JSON.stringify(data, null, 2);
+function asText(data: unknown, maxResponseSize?: number): string {
+  const text = JSON.stringify(data, null, 2);
+
+  if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
+    const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
+    return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
+  }
+
+  return text;
 }
 
 // scrape tool (v2 semantics, minimal args)
@@ -229,12 +236,13 @@ const scrapeParamsSchema = z.object({
     .optional(),
   storeInCache: z.boolean().optional(),
   maxAge: z.number().optional(),
+  maxResponseSize: z.number().optional(),
 });
 
 server.addTool({
   name: 'firecrawl_scrape',
   description: `
-Scrape content from a single URL with advanced options. 
+Scrape content from a single URL with advanced options.
 This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
 
 **Best for:** Single page content extraction, when you know exactly which page contains the information.
@@ -248,11 +256,13 @@ This is the most powerful, fastest and most reliable scraper tool, if available
   "arguments": {
     "url": "https://example.com",
     "formats": ["markdown"],
-    "maxAge": 172800000
+    "maxAge": 172800000,
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
 **Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
 **Returns:** Markdown, HTML, or other formats as specified.
 ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
 `,
@@ -261,12 +271,12 @@ ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions
     args: unknown,
     { session, log }: { session?: SessionData; log: Logger }
   ): Promise<string> => {
-    const { url, ...options } = args as { url: string } & Record<string, unknown>;
+    const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
     const client = getClient(session);
     const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
     log.info('Scraping URL', { url: String(url) });
     const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN } as any);
-    return asText(res);
+    return asText(res, maxResponseSize);
   },
 });
 
@@ -278,13 +288,15 @@ Map a website to discover all indexed URLs on the site.
 **Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
 **Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
 **Common mistakes:** Using crawl to discover URLs instead of map.
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Prompt Example:** "List all URLs on example.com."
 **Usage Example:**
 \`\`\`json
 {
   "name": "firecrawl_map",
   "arguments": {
-    "url": "https://example.com"
+    "url": "https://example.com",
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
@@ -297,17 +309,18 @@ Map a website to discover all indexed URLs on the site.
     includeSubdomains: z.boolean().optional(),
     limit: z.number().optional(),
     ignoreQueryParameters: z.boolean().optional(),
+    maxResponseSize: z.number().optional(),
   }),
   execute: async (
     args: unknown,
     { session, log }: { session?: SessionData; log: Logger }
   ): Promise<string> => {
-    const { url, ...options } = args as { url: string } & Record<string, unknown>;
+    const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
     const client = getClient(session);
     const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
     log.info('Mapping URL', { url: String(url) });
     const res = await client.map(String(url), { ...cleaned, origin: ORIGIN } as any);
-    return asText(res);
+    return asText(res, maxResponseSize);
   },
 });
 
@@ -366,10 +379,12 @@ The query also supports search operators, that you can use if needed to refine t
     "scrapeOptions": {
       "formats": ["markdown"],
       "onlyMainContent": true
-    }
+    },
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Array of search results (with optional scraped content).
 `,
   parameters: z.object({
@@ -382,20 +397,21 @@ The query also supports search operators, that you can use if needed to refine t
       .array(z.object({ type: z.enum(['web', 'images', 'news']) }))
       .optional(),
     scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
+    maxResponseSize: z.number().optional(),
   }),
   execute: async (
     args: unknown,
     { session, log }: { session?: SessionData; log: Logger }
   ): Promise<string> => {
     const client = getClient(session);
-    const { query, ...opts } = args as Record<string, unknown>;
+    const { query, maxResponseSize, ...opts } = args as { query: string; maxResponseSize?: number } & Record<string, unknown>;
     const cleaned = removeEmptyTopLevel(opts as Record<string, unknown>);
     log.info('Searching', { query: String(query) });
     const res = await client.search(query as string, {
       ...(cleaned as any),
       origin: ORIGIN,
     });
-    return asText(res);
+    return asText(res, maxResponseSize);
   },
 });
 
@@ -419,10 +435,12 @@ server.addTool({
      "limit": 20,
      "allowExternalLinks": false,
      "deduplicateSimilarURLs": true,
-     "sitemap": "include"
+     "sitemap": "include",
+     "maxResponseSize": 50000
    }
  }
  \`\`\`
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
  **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
  ${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
  `,
@@ -453,17 +471,18 @@ server.addTool({
     deduplicateSimilarURLs: z.boolean().optional(),
     ignoreQueryParameters: z.boolean().optional(),
     scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
+    maxResponseSize: z.number().optional(),
   }),
   execute: async (args, { session, log }) => {
-    const { url, ...options } = args as Record<string, unknown>;
+    const { url, maxResponseSize, ...options } = args as { url: string; maxResponseSize?: number } & Record<string, unknown>;
     const client = getClient(session);
     const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
     log.info('Starting crawl', { url: String(url) });
     const res = await client.crawl(String(url), {
       ...(cleaned as any),
       origin: ORIGIN,
     });
-    return asText(res);
+    return asText(res, maxResponseSize);
   },
 });
 
@@ -477,20 +496,26 @@ Check the status of a crawl job.
 {
   "name": "firecrawl_check_crawl_status",
   "arguments": {
-    "id": "550e8400-e29b-41d4-a716-446655440000"
+    "id": "550e8400-e29b-41d4-a716-446655440000",
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Status and progress of the crawl job, including results if available.
 `,
-  parameters: z.object({ id: z.string() }),
+  parameters: z.object({
+    id: z.string(),
+    maxResponseSize: z.number().optional(),
+  }),
   execute: async (
     args: unknown,
     { session }: { session?: SessionData }
   ): Promise<string> => {
+    const { id, maxResponseSize } = args as { id: string; maxResponseSize?: number };
     const client = getClient(session);
-    const res = await client.getCrawlStatus((args as any).id as string);
-    return asText(res);
+    const res = await client.getCrawlStatus(id);
+    return asText(res, maxResponseSize);
   },
 });
 
@@ -527,10 +552,12 @@ Extract structured information from web pages using LLM capabilities. Supports b
     },
     "allowExternalLinks": false,
     "enableWebSearch": false,
-    "includeSubdomains": false
+    "includeSubdomains": false,
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Extracted structured data as defined by your schema.
 `,
   parameters: z.object({
@@ -540,13 +567,14 @@ Extract structured information from web pages using LLM capabilities. Supports b
     allowExternalLinks: z.boolean().optional(),
     enableWebSearch: z.boolean().optional(),
     includeSubdomains: z.boolean().optional(),
+    maxResponseSize: z.number().optional(),
   }),
   execute: async (
     args: unknown,
     { session, log }: { session?: SessionData; log: Logger }
   ): Promise<string> => {
     const client = getClient(session);
-    const a = args as Record<string, unknown>;
+    const a = args as { maxResponseSize?: number } & Record<string, unknown>;
     log.info('Extracting from URLs', {
       count: Array.isArray(a.urls) ? a.urls.length : 0,
     });
@@ -560,7 +588,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
       origin: ORIGIN,
     });
     const res = await client.extract(extractBody as any);
-    return asText(res);
+    return asText(res, a.maxResponseSize);
   },
 });
 const PORT = Number(process.env.PORT || 3000);

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "firecrawl-mcp",`
`3`		`- "version": "3.3.4",`
	`3`	`+ "version": "3.3.5",`
`4`	`4`	`"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"bin": {`