PhantomJS永久Cookie和Javascript
我一直在努力与大部分时间。简而言之,我试图通过Node.js模块与PhantomJS一起登录亚马逊。我的问题的简短版本是,亚马逊给我一个消息,说需要cookies来使用该网站。PhantomJS永久Cookie和Javascript
这里是我目前的资源......
Working example of logging into Amazon using PhantomJS
SO question addressing persistent cookies in PhantomJS
Another SO question about cookies set by Javascript
这最后一个问题是特别有趣,因为第一个答案解决了用户代理(我已经尝试过至少3次或4结果相同),而第二个答案指向我认为可能是我的问题。总之,亚马逊可能试图通过javascript设置测试cookie,然后检查cookie是否设置成功,以确定用户是否允许cookie。我可以成功地确认我的cookie文件正在创建,并且亚马逊已经在文件中设置了cookie,但是当提交登录表单时显然似乎不够用,因为在下一页我被cookie警告阻止。这让我相信最后一个问题中的用户是正确的 - 我的网页的Javascript并没有被解雇,尽管试图确保它是。
最后,我的page.render显示了一条亚马逊消息,说我需要启用cookie才能继续。这里是我的代码...
'use strict';
/**
* Module dependencies.
*/
var mongoose = require('mongoose'),
phantom = require('phantom'),
// Admin = mongoose.model('Admin'),
Item = mongoose.model('Item'),
config = require('../config/config');
/*
* Check function.
*/
module.exports.check= function() {
var loadInProgress = false,
interval = '',
testindex = 0,
cookiePath = __dirname + 'cookies.txt',
url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome',
tag = config.defaultAffiliateTag,
periodType = 'preSelected',
preSelectedPeriod = 'yesterday',
// url2 is for order data
url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX',
// url3 is for earnings data
url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX';
phantom.create([/* '--debug=true', */ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false', '--cookies-file=' + cookiePath]).then(function(ph) {
ph.createPage().then(function(page) {
page.on('onLoadStarted', function() {
loadInProgress = true;
});
page.on('onLoadFinished', function(response) {
if (response === 'success') {
loadInProgress = false;
} else {
console.log('Phantom page failed to load.');
}
});
page.on('onError', function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
console.error(msgStack.join('\n'));
});
page.on('onResourceError', function(resourceError) {
console.log('= onResourceError()');
console.log(' - unable to load url: "' + resourceError.url + '"');
console.log(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString);
loadInProgress = false;
});
var steps = [
// Step 1
function() {
// Load the initial login page.
console.log('--- JAVASCRIPT ---')
// This is where I try to ensure my page has Javascript Enabled.
// val outputs true here.
page.setting('javascriptEnabled').then(function(val) {
console.log('val: ' + val);
page.setting('settings.userAgent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36');
loadInProgress = true;
page.open(url);
})
},
// Step 2
function() {
// Update username/password.
page.evaluate(function() {
document.getElementById('ap_email').value = 'XXXX';
document.getElementById('ap_password').value = 'XXXX';
});
},
// Step 3
function() {
// Login.
loadInProgress = true;
page.evaluate(function() {
document.forms['signIn'].submit();
});
},
// Step 4
function() {
loadInProgress = true;
page.open(url2);
}
];
var interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] === 'function') {
steps[testindex]();
console.log('Test Index: ' + (testindex + 1));
page.render('config/images/step' + (testindex + 1) + '.png');
testindex++;
}
if (typeof steps[testindex] !== 'function') {
clearInterval(interval);
setTimeout(function() {
ph.exit();
}, 5000);
}
}, 50);
});
});
};
我得到什么作为这样的结果是输出如下:
--- JAVASCRIPT ---
Test Index: 1
val: true
Test Index: 2
Test Index: 3
Test Index: 4
= onResourceError()
- unable to load url: "https://sentry.amazon.com/SSO/redirect?response_typ
e=id_token&client_id=affiliate-program.amazon.com%3A443&redirect_uri=https%3A%2F
%2Faffiliate-program.amazon.com%3A443%2Fhome%2Freports%2Ftable.json%3Fquery%255B
type%255D%3Dorders%26query%255Bstart_date%255D%3D2016-05-28%26query%255Bend_date
%255D%3D2016-06-26%26query%255Btag_id%255D%3D189318233%26query%255Bdevice_type%2
55D%3Dall%26query%255Blast_accessed_row_index%255D%3D0%26query%255Bcolumns%255D%
3Dtitle%252Casin%252Ccategory%252Cclicks%252Cconversion%252Cseller%252Cdqty%252C
nqty%252Cqty%26query%255Bskip%255D%3D0%26query%255Bsort%255D%3Dasin%26query%255B
limit%255D%3D25%26store_id%3XXXX&scope=openid&nonce=5d8a3f10bb3746c799
a05a927b0204f3c0629d5c8c5646bb49ccdcd93f07247e&sentry_handler_version=TomcatSSOF
ilter-1.1-1"
- error code: 5, description: Operation canceled
Phantom page failed to load.
任何人都可以开导我,我可能会错过了什么?
它似乎是PhantomJS 2.1.1(由NPM模块实现的版本)或NPM模块本身的问题。
我完全用Horseman和PhantomJS 2.0.0重写了这个脚本,并立即开始工作。对于未来的后代,以下是工作实施。我只有一天进入骑士阶段,我已经喜欢比我用过的任何其他Phantom包装更干净的链式执行。
'use strict';
/**
* Module dependencies.
*/
var mongoose = require('mongoose'),
Horseman = require('node-horseman'),
phPath = __dirname + '\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe',
Item = mongoose.model('Item'),
config = require('../config/config');
/*
* Check function.
*/
module.exports.updateItems = function() {
var cookiePath = __dirname + 'cookies.txt',
url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome',
tag = config.defaultAffiliateTag,
periodType = 'preSelected',
preSelectedPeriod = 'yesterday',
// url2 is for order data
url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX',
// url3 is for earnings data
url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX';
var horseman = new Horseman({
cookiesFile: cookiePath,
ignoreSSLErrors: true,
sslProtocol: 'any',
webSecurity: false,
timeout: 15000,
phantomPath: phPath
});
horseman
.userAgent('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36')
.authentication('XXXX', 'XXXX')
.on('consoleMessage', function(msg) {
console.log(msg);
})
.on('error', function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
console.error(msgStack.join('\n'));
})
.open(url)
.screenshot('config/images/step1.png')
.waitForSelector('#ap_email')
.value('#ap_email', 'XXXX')
.waitForSelector('#ap_password')
.value('#ap_password', 'XXXX')
.screenshot('config/images/step2.png')
.click('#signInSubmit')
.waitForNextPage()
.screenshot('config/images/step3.png')
.open(url2)
.screenshot('config/images/step4.png')
.plainText()
.then(function(txt) {
console.log('Page results: ');
console.dir(txt);
return;
})
.open(url3)
.screenshot('config/images/step5.png')
.plainText()
.then(function(txt) {
console.log('Page results: ');
console.dir(txt);
return;
})
.close();
};
祝你好运!
我最近面临同样的问题,简单的解决方案是将用户添加到新创建的网页。如果您使用的是phantomjs-node模块,那么这里是代码。
page.setting("userAgent", "your user agent here");
有趣!如果必须有逻辑判断/逻辑判断,那么人们如何处理Horseman链接脚本? – Vaviloff
@Vaviloff很好的问题。 Horseman拥有一个'do'函数(https://github.com/johntitus/node-horseman#dofn),它允许您在不破坏链条的情况下运行任意函数。据我了解,结果传递给链中的下一个函数,所以你可以运行'.do(function(){return stuff;})。然后(function(stuffFromDo){return moreStuff;});'Pretty光滑,如果你问我。 – aikorei