node.jsdockerselenium-webdriver

Docker Selenium server terminated early with status 255


I am attempting to create a containerised express app that launches a selenium webdriver instance and does some scraping.

The following app works perfectly running locally, however when I try to containerise it, the express server starts as expected, but after making a request to / it hangs for ~30 seconds then crashes.

Could someone point me into the right direction?

My confg:

app.js

import 'chromedriver';

import express from 'express';
import webdriver from 'selenium-webdriver';

const app = express()
const port = 8080

app.get('/', async (req, res) => {
    console.log('Starting the browser')
    const chromeCapabilities = webdriver.Capabilities.chrome();
    //setting chrome options to start the browser fully maximized
    const chromeOptions = {
        'args': ['--test-type', '--start-maximized', '--headless=new', '--disable-gpu']
    };
    chromeCapabilities.set('chromeOptions', chromeOptions);
    const driver = new webdriver.Builder()
        .forBrowser('chrome')
        .withCapabilities(chromeCapabilities)
        .build();

    // console.log('Driver started, going to the website')
    await driver.get('https://google.com');


    driver.quit();

    res.send('Hello World!')
})

// healthcheck
app.get('/health', (req, res) => {
    res.send('OK')
});


app.listen(port, () => {
    console.log(`Example app listening on port ${port}`)
})

dockerfile

FROM node:18

# Install Chrome
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
RUN dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install


WORKDIR /usr/src/app
COPY package*.json ./

# Install production dependencies.
RUN npm install --only=production
RUN npm install chromedriver --chromedriver-force-download

# Copy local code to the container image.
COPY . ./

# Run the web service on container startup.
CMD [ "npm", "start" ]

package.json

  "dependencies": {
    "express": "^4.18.2",
    "selenium-webdriver": "^4.18.1",
    "supabase": "^1.145.4"
  }

logs

docker run --rm -p 8080:8080 -e PORT=8080 hw

> cloud-run@1.0.0 start
> node app.js

Example app listening on port 8080
Starting the browser
/usr/src/app/node_modules/selenium-webdriver/remote/index.js:256
              let cancelToken = earlyTermination.catch((e) => reject(Error(e.message)))
                                                                     ^

Error: Server terminated early with status 255
    at /usr/src/app/node_modules/selenium-webdriver/remote/index.js:256:70
    at process.processTicksAndRejections (node:internal/process/task_queues:95:5)

Node.js v18.19.1
make: *** [build] Error 1
(.venv) -------------------------------------------------------------------------------------------------------------------------------------------------

Solution

  • The key is to get compatible versions of Chrome and ChromeDriver installed on the image.

    🗎 Dockerfile

    FROM node:18
    
    RUN apt-get update -qq -y && \
        apt-get install -y \
            libasound2 \
            libatk-bridge2.0-0 \
            libgtk-4-1 \
            libnss3 \
            xdg-utils \
            wget && \
        wget -q -O chrome-linux64.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/121.0.6167.85/linux64/chrome-linux64.zip && \
        unzip chrome-linux64.zip && \
        rm chrome-linux64.zip && \
        mv chrome-linux64 /opt/chrome/ && \
        ln -s /opt/chrome/chrome /usr/local/bin/ && \
        wget -q -O chromedriver-linux64.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/121.0.6167.85/linux64/chromedriver-linux64.zip && \
        unzip -j chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
        rm chromedriver-linux64.zip && \
        mv chromedriver /usr/local/bin/
    
    WORKDIR /usr/src/app
    COPY package*.json .
    
    ENV CHROMEDRIVER_SKIP_DOWNLOAD=true
    
    RUN npm install --omit=dev
    RUN npm install chromedriver
    
    COPY . .
    
    CMD [ "npm", "start" ]
    

    🗎 app.js (I updated the / endpoint so that it returns the content downloaded via Chrome rather than the string 'Hello World!'. Just seems to be a better demonstration that it all works!).

    import { Builder } from 'selenium-webdriver';
    import chrome from 'selenium-webdriver/chrome.js';
    
    import express from "express";
    
    const app = express()
    const port = 8080
    
    app.get("/", async (req, res) => {
        console.log("Start the browser.")
    
        let chromeOptions = new chrome.Options();
        chromeOptions.addArguments('--headless', '--disable-gpu', '--no-sandbox');
    
        let driver = new Builder()
            .forBrowser('chrome')
            .setChromeOptions(chromeOptions)
            .build();
    
        console.log("Done!")
    
        console.log("Open Google.")
        await driver.get("https://google.com");
        console.log("Done!")
    
        const html = await driver.getPageSource();
    
        driver.quit();
    
        res.send(html)
    })
    
    app.get("/health", (req, res) => {
        res.send("OK")
    });
    
    app.listen(port, () => {
        console.log(`Example app listening on port ${port}.`)
    })
    

    🗎 package.json

    {
      "name": "selenium-scraper",
      "type": "module",
      "version": "1.0.0",
      "description": "",
      "main": "index.js",
      "scripts": {
        "start": "node app.js"
      },
      "author": "",
      "license": "ISC",
      "dependencies": {
        "express": "^4.18.2",
        "selenium-webdriver": "^4.18.1",
        "supabase": "^1.145.4"
      }
    }
    

    Build and run.

    docker build -t express-chrome . && docker run -it -p 8080:8080 express-chrome
    

    enter image description here

    See this for more details.