Using CasperJS to scrape website data
1 min readNov 3, 2017
CasperJS can be used for Navigation Scipting, Scraping and testing. In this Tutorial we will see how to scrape data using CasperJS. To run casperJS you will need a Headless browser like PhantomJS or SlimerJS.Latest versions of casperJS need PhantomJS 1.9+
Installing PhantomJS:
sudo apt-get install libfontconfig1
cd /opt
wget https://phantomjs.googlecode.com/files/phantomjs-1.9.1-linux-x86_64.tar.bz2
tar xjf phantomjs-1.9.1-linux-x86_64.tar.bz2
rm -f phantomjs-1.9.1-linux-x86_64.tar.bz2
ln -s phantomjs-1.9.1-linux-x86_64 phantomjs
sudo ln -s /opt/phantomjs/bin/phantomjs /usr/bin/phantomjs
Installing CasperJS:
cd /opt/
git clone git://github.com/n1k0/casperjs.git
cd casperjs
ln -sf `pwd`/bin/casperjs /usr/local/bin/casperjs
Simple JS Script to Login and print page title:
phantom.casperTest = true;
var fs = require('fs');
var utils = require('utils');var casper = require('casper').create({
pageSettings: {
loadImages: false, // The WebPage instance used by Casper will
loadPlugins: false, // use these settings
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'
}
});url = <url-of-login-page>casper.start(url, function() {
// replace in below format form.<class-name> or form#<form-id>
this.fill('form.<form-class>', {
email: <enter-email-id-here>,
password: <enter-password-here>
}, true);
}); casper.then(function() {
this.echo(this.getTitle());
});casper.run();
This script will login into website and print page Title. In this script we performed both Navigation and Scraping.