Export pages and assets

Export a full page snapshot (HTML, stylesheets, scripts, images, and fonts) in a single request.

Prerequisites

A Browserless API token from your account dashboard

Steps

REST API
Frameworks

POST to the /export endpoint with includeResources: true. The response is a ZIP file containing the page HTML and its linked assets (same-origin resources are always included; cross-origin resources depend on server configuration).

View Full Code on GitHub

1. Build the request

Append your token to the export endpoint:

https://production-sfo.browserless.io/export?token=YOUR_API_TOKEN_HERE

2. Send the request and save the ZIP

curl --fail -X POST \
  "https://production-sfo.browserless.io/export?token=YOUR_API_TOKEN_HERE" \
  -H "Content-Type: application/json" \
  -d '{"url":"https://scraping-sandbox.netlify.app/harvest-direct","includeResources":true}' \
  --output page.zip

echo "Saved page.zip"

View Full Code on GitHub

1. Export the page and save the ZIP

import fs from 'fs';

const TOKEN = 'YOUR_API_TOKEN_HERE';

const response = await fetch(
  `https://production-sfo.browserless.io/export?token=${TOKEN}`,
  {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      url: 'https://scraping-sandbox.netlify.app/harvest-direct',
      includeResources: true,
    }),
  }
);

if (!response.ok) {
  throw new Error(`Export failed: ${response.status} ${response.statusText}`);
}

const buf = Buffer.from(await response.arrayBuffer());
fs.writeFileSync('page.zip', buf);
console.log('Saved page.zip');

2. Check the output

Run with node images.mjs. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Install dependencies

pip install requests

2. Export the page and save the ZIP

import requests

TOKEN = 'YOUR_API_TOKEN_HERE'

response = requests.post(
    f'https://production-sfo.browserless.io/export?token={TOKEN}',
    json={'url': 'https://scraping-sandbox.netlify.app/harvest-direct', 'includeResources': True},
)
response.raise_for_status()

with open('page.zip', 'wb') as f:
    f.write(response.content)

print('Saved page.zip')

3. Check the output

Run with python images.py. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Dependencies

java.net.http.HttpClient ships with the JDK (Java 11+). No extra packages needed.

2. Export the page and save the ZIP

import java.net.URI;
import java.net.http.*;
import java.nio.file.*;

public class DownloadImages {
    public static void main(String[] args) throws Exception {
        String token = "YOUR_API_TOKEN_HERE";

        HttpRequest req = HttpRequest.newBuilder()
            .uri(URI.create("https://production-sfo.browserless.io/export?token=" + token))
            .header("Content-Type", "application/json")
            .POST(HttpRequest.BodyPublishers.ofString(
                "{\"url\":\"https://scraping-sandbox.netlify.app/harvest-direct\",\"includeResources\":true}"))
            .build();

        HttpResponse<byte[]> res = HttpClient.newHttpClient()
            .send(req, HttpResponse.BodyHandlers.ofByteArray());

        if (res.statusCode() >= 400) {
            throw new RuntimeException("Export failed: " + res.statusCode());
        }

        Files.write(Path.of("page.zip"), res.body());
        System.out.println("Saved page.zip");
    }
}

3. Check the output

Compile with javac DownloadImages.java and run with java DownloadImages. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Dependencies

System.Net.Http.HttpClient is part of the .NET standard library. No packages needed.

2. Export the page and save the ZIP

using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

class DownloadImages
{
    static async Task Main()
    {
        const string token = "YOUR_API_TOKEN_HERE";
        using var client = new HttpClient();

        var body = new StringContent(
            "{\"url\":\"https://scraping-sandbox.netlify.app/harvest-direct\",\"includeResources\":true}",
            Encoding.UTF8,
            "application/json"
        );
        var response = await client.PostAsync(
            $"https://production-sfo.browserless.io/export?token={token}",
            body
        );
        response.EnsureSuccessStatusCode();

        var bytes = await response.Content.ReadAsByteArrayAsync();
        await File.WriteAllBytesAsync("page.zip", bytes);
        Console.WriteLine("Saved page.zip");
    }
}

3. Check the output

Run with dotnet run. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Dependencies

net/http and io are part of Go's standard library. No extra packages needed.

2. Export the page and save the ZIP

package main

import (
    "bytes"
    "fmt"
    "io"
    "net/http"
    "os"
)

func main() {
    token := "YOUR_API_TOKEN_HERE"

    body := `{"url":"https://scraping-sandbox.netlify.app/harvest-direct","includeResources":true}`
    req, _ := http.NewRequest("POST",
        "https://production-sfo.browserless.io/export?token="+token,
        bytes.NewBufferString(body),
    )
    req.Header.Set("Content-Type", "application/json")

    resp, err := http.DefaultClient.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    if resp.StatusCode >= 400 {
        panic(fmt.Sprintf("Export failed: %d", resp.StatusCode))
    }

    data, _ := io.ReadAll(resp.Body)
    os.WriteFile("page.zip", data, 0644)
    fmt.Println("Saved page.zip")
}

3. Check the output

Run with go run main.go. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Dependencies

This example uses PHP's built-in curl. No Composer packages needed.

2. Export the page and save the ZIP

<?php

$token = 'YOUR_API_TOKEN_HERE';

$ch = curl_init('https://production-sfo.browserless.io/export?token=' . $token);
curl_setopt_array($ch, [
    CURLOPT_POST           => true,
    CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
    CURLOPT_POSTFIELDS     => json_encode([
        'url'              => 'https://scraping-sandbox.netlify.app/harvest-direct',
        'includeResources' => true,
    ]),
    CURLOPT_RETURNTRANSFER => true,
]);
$body = curl_exec($ch);
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ($status >= 400) {
    die("Export failed: HTTP $status\n");
}

file_put_contents('page.zip', $body);
echo "Saved page.zip\n";

3. Check the output

Run with php images.php. The ZIP is saved as page.zip.

View Full Code on GitHub

1. Dependencies

net/http, json, and uri are part of Ruby's standard library. No gems required.

2. Export the page and save the ZIP

require 'net/http'
require 'json'
require 'uri'

TOKEN = 'YOUR_API_TOKEN_HERE'

uri = URI("https://production-sfo.browserless.io/export?token=#{TOKEN}")
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Post.new(uri)
request['Content-Type'] = 'application/json'
request.body = JSON.generate({ url: 'https://scraping-sandbox.netlify.app/harvest-direct', includeResources: true })

response = http.request(request)
raise "Export failed: #{response.code}" unless response.is_a?(Net::HTTPSuccess)

File.binwrite('page.zip', response.body)
puts 'Saved page.zip'

3. Check the output

Run with ruby images.rb. The ZIP is saved as page.zip.

Intercept network responses during page load to capture linked resources: stylesheets, scripts, images, and fonts. Puppeteer and Playwright use response interception; the Go tab extracts resource URLs from the DOM instead, covering stylesheets, scripts, and images. The HTML is saved via page.content(), and each captured asset is written alongside it.

Puppeteer
Playwright
Go (chromedp)

View Full Code on GitHub

1. Install dependencies

npm install puppeteer-core

2. Connect, intercept responses, and save assets

import fs from 'fs';
import path from 'path';
import puppeteer from 'puppeteer-core';

const TOKEN = 'YOUR_API_TOKEN_HERE';

const browser = await puppeteer.connect({
  browserWSEndpoint: `wss://production-sfo.browserless.io?token=${TOKEN}`,
});

try {
  const page = await browser.newPage();
  const assetPromises = [];

  page.on('response', (response) => {
    const type = response.request().resourceType();
    if (['stylesheet', 'script', 'image', 'font'].includes(type)) {
      assetPromises.push(
        response.buffer()
          .then((buf) => ({ url: response.url(), buf }))
          .catch(() => null)
      );
    }
  });

  await page.goto('https://scraping-sandbox.netlify.app/harvest-direct', { waitUntil: 'networkidle2' });

  const html = await page.content();
  const assets = (await Promise.all(assetPromises)).filter(Boolean);

  fs.mkdirSync('page', { recursive: true });
  fs.writeFileSync('page/index.html', html);
  console.log('Saved page/index.html');

  for (const [i, { url, buf }] of assets.entries()) {
    const ext = path.extname(new URL(url).pathname) || '';
    const filename = `asset-${i}${ext}`;
    fs.writeFileSync(`page/${filename}`, buf);
    console.log(`Saved page/${filename}`);
  }
} finally {
  await browser.close();
}

3. Check the output

Run with node images.mjs. The page/ directory contains index.html and all intercepted assets.

JavaScript
Python
Java
C#

View Full Code on GitHub

1. Install dependencies

npm install playwright-core

2. Connect, intercept responses, and save assets

import fs from 'fs';
import path from 'path';
import { chromium } from 'playwright-core';

const TOKEN = 'YOUR_API_TOKEN_HERE';

const browser = await chromium.connectOverCDP(
  `wss://production-sfo.browserless.io?token=${TOKEN}`
);

try {
  // Use the default context — browser.newPage() creates a new context that
  // doesn't inherit proxy, profile, or launch settings.
  const context = browser.contexts()[0];
  const page = await context.newPage();
  const assetPromises = [];

  page.on('response', (response) => {
    const type = response.request().resourceType();
    if (['stylesheet', 'script', 'image', 'font'].includes(type)) {
      assetPromises.push(
        response.body()
          .then((buf) => ({ url: response.url(), buf }))
          .catch(() => null)
      );
    }
  });

  await page.goto('https://scraping-sandbox.netlify.app/harvest-direct', { waitUntil: 'networkidle' });

  const html = await page.content();
  const assets = (await Promise.all(assetPromises)).filter(Boolean);

  fs.mkdirSync('page', { recursive: true });
  fs.writeFileSync('page/index.html', html);
  console.log('Saved page/index.html');

  for (const [i, { url, buf }] of assets.entries()) {
    const ext = path.extname(new URL(url).pathname) || '';
    const filename = `asset-${i}${ext}`;
    fs.writeFileSync(`page/${filename}`, buf);
    console.log(`Saved page/${filename}`);
  }
} finally {
  await browser.close();
}

3. Check the output

Run with node images.mjs. The page/ directory contains index.html and all intercepted assets.

View Full Code on GitHub

1. Install dependencies

pip install playwright
playwright install chromium

2. Connect, intercept responses, and save assets

import os
from urllib.parse import urlparse
from playwright.sync_api import sync_playwright

TOKEN = 'YOUR_API_TOKEN_HERE'
WS_ENDPOINT = f'wss://production-sfo.browserless.io?token={TOKEN}'

assets = []

def handle_response(response):
    if response.request.resource_type in ('stylesheet', 'script', 'image', 'font'):
        try:
            assets.append({'url': response.url, 'buf': response.body()})
        except Exception:
            pass

with sync_playwright() as playwright:
    browser = playwright.chromium.connect_over_cdp(WS_ENDPOINT)
    try:
        # Use the default context — browser.new_page() creates a new context that
        # doesn't inherit proxy, profile, or launch settings.
        context = browser.contexts[0]
        page = context.new_page()
        page.on('response', handle_response)
        page.goto('https://scraping-sandbox.netlify.app/harvest-direct')
        page.wait_for_load_state('networkidle')

        html = page.content()

        os.makedirs('page', exist_ok=True)
        with open('page/index.html', 'w', encoding='utf-8') as f:
            f.write(html)
        print('Saved page/index.html')

        for i, asset in enumerate(assets):
            ext = os.path.splitext(urlparse(asset['url']).path)[1]
            filename = f'page/asset-{i}{ext}'
            with open(filename, 'wb') as f:
                f.write(asset['buf'])
            print(f'Saved {filename}')
    finally:
        browser.close()

3. Check the output

Run with python images.py. The page/ directory contains index.html and all intercepted assets.

View Full Code on GitHub

1. Install dependencies

Add the Playwright dependency to your pom.xml:

<dependency>
  <groupId>com.microsoft.playwright</groupId>
  <artifactId>playwright</artifactId>
  <version>1.44.0</version>
</dependency>

2. Connect, intercept responses, and save assets

import com.microsoft.playwright.*;
import java.nio.file.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

public class DownloadImages {
    record Asset(String url, byte[] buf) {}

    public static void main(String[] args) throws Exception {
        String TOKEN = "YOUR_API_TOKEN_HERE";
        String WS_ENDPOINT = "wss://production-sfo.browserless.io?token=" + TOKEN;

        try (Playwright playwright = Playwright.create()) {
            Browser browser = playwright.chromium().connectOverCDP(WS_ENDPOINT);
            try {
                BrowserContext context = browser.contexts().get(0);
                Page page = context.newPage();
                List<Asset> assets = new ArrayList<>();

                page.onResponse(response -> {
                    String type = response.request().resourceType();
                    if (List.of("stylesheet", "script", "image", "font").contains(type)) {
                        try {
                            assets.add(new Asset(response.url(), response.body()));
                        } catch (Exception ignored) {}
                    }
                });

                page.navigate("https://scraping-sandbox.netlify.app/harvest-direct");
                page.waitForLoadState(LoadState.NETWORKIDLE);

                String html = page.content();

                Files.createDirectories(Path.of("page"));
                Files.writeString(Path.of("page/index.html"), html);
                System.out.println("Saved page/index.html");

                for (int i = 0; i < assets.size(); i++) {
                    String urlPath = new URI(assets.get(i).url()).getPath();
                    int dot = urlPath.lastIndexOf('.');
                    String ext = dot >= 0 ? urlPath.substring(dot) : "";
                    Path dest = Path.of("page/asset-" + i + ext);
                    Files.write(dest, assets.get(i).buf());
                    System.out.println("Saved " + dest);
                }
            } finally {
                browser.close();
            }
        }
    }
}

3. Check the output

Compile with mvn compile and run with mvn exec:java. The page/ directory contains index.html and all intercepted assets.

View Full Code on GitHub

1. Install dependencies

dotnet add package Microsoft.Playwright
playwright install chromium

2. Connect, intercept responses, and save assets

using Microsoft.Playwright;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;

class DownloadImages
{
    static async Task Main()
    {
        const string TOKEN = "YOUR_API_TOKEN_HERE";
        string WS_ENDPOINT = $"wss://production-sfo.browserless.io?token={TOKEN}";

        using var playwright = await Playwright.CreateAsync();
        var browser = await playwright.Chromium.ConnectOverCDPAsync(WS_ENDPOINT);

        try
        {
            var context = browser.Contexts[0];
            var page = await context.NewPageAsync();
            var assetTasks = new List<Task<(string url, byte[] buf)?>>();

            page.Response += (_, response) =>
            {
                var type = response.Request.ResourceType;
                if (type is "stylesheet" or "script" or "image" or "font")
                {
                    var url = response.Url;
                    assetTasks.Add(response.BodyAsync()
                        .ContinueWith(t => t.IsCompletedSuccessfully
                            ? ((string, byte[])?) (url, t.Result)
                            : null));
                }
            };

            await page.GotoAsync("https://scraping-sandbox.netlify.app/harvest-direct",
                new() { WaitUntil = WaitUntilState.NetworkIdle });

            var html = await page.ContentAsync();
            var assets = (await Task.WhenAll(assetTasks))
                .Where(a => a.HasValue)
                .Select(a => a!.Value);

            Directory.CreateDirectory("page");
            await File.WriteAllTextAsync("page/index.html", html);
            Console.WriteLine("Saved page/index.html");

            int i = 0;
            foreach (var (url, buf) in assets)
            {
                string ext = Path.GetExtension(new Uri(url).AbsolutePath);
                string filename = $"page/asset-{i}{ext}";
                await File.WriteAllBytesAsync(filename, buf);
                Console.WriteLine($"Saved {filename}");
                i++;
            }
        }
        finally
        {
            await browser.CloseAsync();
        }
    }
}

3. Check the output

Run with dotnet run. The page/ directory contains index.html and all intercepted assets.

View Full Code on GitHub

1. Install dependencies

go get github.com/chromedp/chromedp

2. Connect, extract HTML and resource URLs, and download each asset

package main

import (
    "context"
    "fmt"
    "io"
    "net/http"
    "os"
    "path/filepath"
    "strings"

    "github.com/chromedp/chromedp"
)

func main() {
    token := "YOUR_API_TOKEN_HERE"
    ws := fmt.Sprintf("wss://production-sfo.browserless.io?token=%s", token)

    allocCtx, cancel := chromedp.NewRemoteAllocator(context.Background(), ws, chromedp.NoModifyURL)
    defer cancel()

    ctx, cancel := chromedp.NewContext(allocCtx)
    defer cancel()

    var html string
    var resourceURLs []string

    if err := chromedp.Run(ctx,
        chromedp.Navigate("https://scraping-sandbox.netlify.app/harvest-direct"),
        chromedp.WaitReady("body"),
        chromedp.OuterHTML("html", &html),
        chromedp.Evaluate(`[
            ...Array.from(document.querySelectorAll('link[href]'), el => el.href),
            ...Array.from(document.querySelectorAll('script[src]'), el => el.src),
            ...Array.from(document.querySelectorAll('img[src]'), el => el.src),
        ].filter(url => url.startsWith('http'))`, &resourceURLs),
    ); err != nil {
        panic(err)
    }

    os.MkdirAll("page", 0755)
    os.WriteFile("page/index.html", []byte(html), 0644)
    fmt.Println("Saved page/index.html")

    for i, url := range resourceURLs {
        resp, err := http.Get(url)
        if err != nil {
            continue
        }
        data, _ := io.ReadAll(resp.Body)
        resp.Body.Close()

        ext := filepath.Ext(strings.Split(url, "?")[0])
        filename := fmt.Sprintf("page/asset-%d%s", i, ext)
        os.WriteFile(filename, data, 0644)
        fmt.Println("Saved", filename)
    }
}

3. Check the output

Run with go run main.go. The page/ directory contains index.html and all downloaded assets.

Export pages and assets

Steps

Next steps

Scrape Structured Data

Take a Screenshot

Steps​

Next steps​

Scrape Structured Data

Take a Screenshot

Steps

Next steps