webpage模块

PhantomJS提供了webpage模块,用于操作网页。

导入模块,并创建一个实例:

var page = require('webpage').create();

在交互模式下,先导入此模块,再输入page,会打印出此对象的详细构造:

{
   "addCookie": "[Function]",
   "canGoBack": false,
   "canGoForward": false,
   "childFramesCount": "[Function]",
   "childFramesName": "[Function]",
   "clearCookies": "[Function]",
   "clearMemoryCache": "[Function]",
   "clipRect": {
      "height": 0,
      "left": 0,
      "top": 0,
      "width": 0
   },
   "close": "[Function]",
   "closing": "[Function]",
   "content": "",
   "cookieJar": {
      "objectName": "",
      "cookies": [],
      "destroyed(QObject*)": "[Function]",
      "destroyed()": "[Function]",
      "objectNameChanged(QString)": "[Function]",
      "deleteLater()": "[Function]",
      "addCookie(QVariantMap)": "[Function]",
      "addCookieFromMap(QVariantMap,QString)": "[Function]",
      "addCookieFromMap(QVariantMap)": "[Function]",
      "addCookiesFromMap(QVariantList,QString)": "[Function]",
      "addCookiesFromMap(QVariantList)": "[Function]",
      "cookiesToMap(QString)": "[Function]",
      "cookiesToMap()": "[Function]",
      "cookieToMap(QString,QString)": "[Function]",
      "cookieToMap(QString)": "[Function]",
      "deleteCookie(QString,QString)": "[Function]",
      "deleteCookie(QString)": "[Function]",
      "clearCookies()": "[Function]",
      "close()": "[Function]"
   },
   "cookies": [],
   "currentFrameName": "[Function]",
   "customHeaders": {},
   "deleteCookie": "[Function]",
   "evaluateJavaScript": "[Function]",
   "finish": "[Function]",
   "focusedFrameName": "",
   "frameContent": "",
   "frameName": "",
   "framePlainText": "",
   "frameTitle": "",
   "frameUrl": "",
   "framesCount": 0,
   "framesName": [],
   "getPage": "[Function]",
   "go": "[Function]",
   "goBack": "[Function]",
   "goForward": "[Function]",
   "handleCurrentFrameDestroyed": "[Function]",
   "handleRepaintRequested": "[Function]",
   "handleUrlChanged": "[Function]",
   "initialized": "[Function]",
   "injectJs": "[Function]",
   "javaScriptAlertSent": "[Function]",
   "javaScriptConsoleMessageSent": "[Function]",
   "javaScriptErrorSent": "[Function]",
   "libraryPath": "/Users/leleliu008/git/document",
   "loadFinished": "[Function]",
   "loadStarted": "[Function]",
   "loading": false,
   "loadingProgress": 0,
   "navigationLocked": false,
   "navigationRequested": "[Function]",
   "offlineStoragePath": "/Users/leleliu008/Library/Application Support/Ofi Labs/PhantomJS",
   "offlineStorageQuota": 5242880,
   "openUrl": "[Function]",
   "ownsPages": true,
   "pages": [],
   "pagesWindowName": [],
   "paperSize": {},
   "plainText": "",
   "rawPageCreated": "[Function]",
   "release": "[Function]",
   "reload": "[Function]",
   "render": "[Function]",
   "renderBase64": "[Function]",
   "repaintRequested": "[Function]",
   "resourceError": "[Function]",
   "resourceReceived": "[Function]",
   "resourceRequested": "[Function]",
   "resourceTimeout": "[Function]",
   "scrollPosition": {
      "left": 0,
      "top": 0
   },
   "sendEvent": "[Function]",
   "setContent": "[Function]",
   "setCookieJar": "[Function]",
   "setCookieJarFromQObject": "[Function]",
   "setCookies": "[Function]",
   "setProxy": "[Function]",
   "setupFrame": "[Function]",
   "stop": "[Function]",
   "stopJavaScript": "[Function]",
   "switchToChildFrame": "[Function]",
   "switchToFocusedFrame": "[Function]",
   "switchToFrame": "[Function]",
   "switchToMainFrame": "[Function]",
   "switchToParentFrame": "[Function]",
   "title": "",
   "updateLoadingProgress": "[Function]",
   "url": "",
   "urlChanged": "[Function]",
   "viewportSize": {
      "height": 300,
      "width": 400
   },
   "windowName": "",
   "zoomFactor": 1
}
1.1、page.open(String uri, Function callback)

GET请求给定的网址。

callback的原型是function(String status)status只有successfail

示例:

var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        //TODO
    }
    phantom.exit();
});
1.2、page.open(String uri, String method, String body, Function callback)

指定使用的HTTP请求方式。

method是请求方法,比如:GETPOSTPUTDELETEHEAD等。

body是请求体的内容(form)。

callback的原型是function(String status)status只有successfail

示例:

var page = require('webpage').create();
page.open('http://slashdot.org', 'POST', 'xxx=xxxx&yyyy=yyyy', function(status) {
    if (status === 'success') {
        //TODO
    }
    phantom.exit();
});
1.3、page.open(String uri, Object options, Function callback)

详细定制请求。

callback的原型是function(String status)status只有successfail

options对象包含下面的字段:

字段类型说明
operationString请求方法
encodingString编码格式
headersObject请求头
dataString请求体

示例:

var page = require('webpage').create();
var options = {
    operation: "POST",
    encoding: "utf8",
    headers: {
        "Content-Type": "application/json"
    },
    data: JSON.stringify({
        some: "data",
        another: ["custom", "data"]
    })
};
page.open('http://slashdot.org', options, function(status) {
    if (status === 'success') {
        //TODO
    }
    phantom.exit();
});
1.4、page.includeJS(String uri, Function callback)

该方法用于加载网页外部脚本。比如,网页中没有使用jQuery, 你想使用jQuery操作DOM, 那么就可以使用该方法加载jQuery

示例:

var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        page.includeJS('http://path/to/jquery.min.js', function(err) {
            if (!err) {
                //TODO
            }
            phantom.exit();
        });
    } else{
        console.log('fail');
        phantom.exit();
    }
});
1.5、page.evaluate(Function codeBlock)

该方法用于打开网页以后,在页面中执行JavaScript代码。

示例1:

var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        page.includeJS('http://path/to/jquery.min.js', function(err) {
            if (!err) {
                var title = page.evaluate(function() {
                    $("button").click();
                });
            }
            phantom.exit();
        });
    } else{
        console.log('fail');
        phantom.exit();
    }
});

codeBlock函数如果有返回值,那么会被page.evaluate(Function codeBlock)返回。

示例2:

var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        var title = page.evaluate(function() {
            return document.title;
        });
        console.log('title = ' + title);
    }
    phantom.exit();
});
1.6、page.onConsoleMessage(String msg)

网页内部的console语句,以及调用page.evaluate(Function codeBlock)方法时候的codeBlock函数内部的console语句,默认不会显示在命令行。

我们需要使用此回掉函数,把msg打印出来。

这是一个回掉函数,既然是回掉函数,就是等着我们赋值的。

示例:

var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
    console.log(msg);
};
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        page.evaluate(function() {
            console.log('title = ' + document.title);
        });
    } else{
        console.log('fail');
        phantom.exit();
    }
});
1.7、page.viewportSize对象

视口的大小。

此对象包含widthheight两个属性。

注意:在page.open()方法之前进行设置才有效。

1.8、page.zoomFactor

初始界面的缩放因子,范围在[0, 1]。默认是1

注意:在page.open()方法之前进行设置才有效。

1.9、page.render(String destFile [, Object options])

将网页保存成图片。该方法根据后缀名,将网页保存成不同的格式, 目前支持PNGGIFJPEGPDF

示例1:

var page = require('webpage').create();
page.viewportSize = {
    width: 1920,
    height: 1080
};
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        page.render('slashdot.org.png');
    }
    console.log(status);
    phantom.exit();
});

示例2:

var page = require('webpage').create();
page.viewportSize = {
    width: 1920,
    height: 1080
};
page.zoomFactor = 0.75;
page.open('http://slashdot.org', function(status) {
    if (status === 'success') {
        page.render('slashdot.org.png', { format: 'jpeg', quality: '100' });
    }
    console.log(status);
    phantom.exit();
});
1.10、page.onResourceRequested(Object requestInfo, Object request)

当页面请求一个资源时,会触发这个回调函数。

1.11、page.onResourceReceived(Object response)

当网页收到所请求的资源时,就会执行该回调函数。