比特币源码情景分析之script脚本验证(2)
通过上一篇的分析,我们应该已经对script有了一定的理解,这章节我们以源码分析的方式来了解下脚本验证执行流程
bitcoin节点在处理一条交易时就需要验证交易的txin,由于一条交易可能包含多个txin,因而需要执行多个脚本验证,自然需要并行化,因而系统允许定义多个脚本执行线程以加速验证过程。有了这个思考,我们从脚本执行线程出发剥茧抽丝掀开脚本执行的面纱。
先看脚本执行线程的初始化,这个在init.cpp的AppInitMain里
脚本验证执行线程
LogPrintf("Using %u threads for script verification\n", nScriptCheckThreads);
if (nScriptCheckThreads) {
//根据参数创建对应数量的脚本执行线程
for (int i=0; i<nScriptCheckThreads-1; i++)
threadGroup.create_thread(&ThreadScriptCheck);
}
线程的具体实现函数在 validataion.cpp
static CCheckQueue<CScriptCheck> scriptcheckqueue(128);
void ThreadScriptCheck() {
RenameThread("bitcoin-scriptch");
scriptcheckqueue.Thread();
}
class CCheckQueue
{
//! Worker thread
void Thread() //CCheckQueue.Thread()
{
Loop();
}
bool Loop(bool fMaster = false)
{
boost::condition_variable& cond = fMaster ? condMaster : condWorker;
std::vector<T> vChecks;
vChecks.reserve(nBatchSize);
unsigned int nNow = 0;
bool fOk = true;
do {
{
……………..
// Decide how many work units to process now.
// * Do not try to do everything at once, but aim for increasingly smaller batches so
// all workers finish approximately simultaneously.
// * Try to account for idle jobs which will instantly start helping.
// * Don't do batches smaller than 1 (duh), or larger than nBatchSize.
nNow = std::max(1U, std::min(nBatchSize, (unsigned int)queue.size() / (nTotal + nIdle + 1)));
vChecks.resize(nNow);
for (unsigned int i = 0; i < nNow; i++) {
// We want the lock on the mutex to be as short as possible, so swap jobs from the global
// queue to the local batch vector instead of copying.
// 该线程选取一定量的脚本待执行对象
vChecks[i].swap(queue.back());
queue.pop_back();
}
// Check whether we need to do work at all
fOk = fAllOk;
}
// execute work
for (T& check : vChecks)
if (fOk)
//check()函数就是CScriptCheck::operator()()
fOk = check();
vChecks.clear();
} while (true);
}
添加脚本验证对象
//发现新块并处理交易时会验证脚本
bool CChainState::ConnectBlock(const CBlock& block, CValidationState& state, CBlockIndex* pindex,
CCoinsViewCache& view, const CChainParams& chainparams, bool fJustCheck)
{
AssertLockHeld(cs_main);
CCheckQueueControl<CScriptCheck> control(fScriptChecks && nScriptCheckThreads ? &scriptcheckqueue : nullptr);
std::vector<CScriptCheck> vChecks;
bool fCacheResults = fJustCheck; /* Don't cache results if we're actually connecting blocks (still consult the cache, though) */
if (!CheckInputs(tx, state, view, fScriptChecks, flags, fCacheResults, fCacheResults, txdata[i], nScriptCheckThreads ? &vChecks : nullptr))
return error("ConnectBlock(): CheckInputs on %s failed with %s",
tx.GetHash().ToString(), FormatStateMessage(state));
//control负责将某一个具体的验证执行对象添加到队列中
control.Add(vChecks);
}
class CCheckQueueControl
{
void Add(std::vector<T>& vChecks)
{
if (pqueue != nullptr)
pqueue->Add(vChecks);
}
}
脚本验证执行函数
上面可知脚本验证线程最终会执行CScriptCheck::operator
bool CScriptCheck::operator()() {
const CScript &scriptSig = ptxTo->vin[nIn].scriptSig;
const CScriptWitness *witness = &ptxTo->vin[nIn].scriptWitness;
return VerifyScript(scriptSig, m_tx_out.scriptPubKey, witness, nFlags, CachingTransactionSignatureChecker(ptxTo, nIn, m_tx_out.nValue, cacheStore, *txdata), &error);
}
P2SH脚本验证源码分析
为了更完整的分析该过程,我以最复杂的脚本模板P2SH为例分析,从上一章接的介绍我们知道,P2SH的脚本如下:
<Sig1> <Sig2> <2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG> OP_HASH160 8ac1d7a2fa204a16dc984fa81cfdf86a2a4e1731 OP_EQUAL
这里的scriptSig是 <Sig1> <Sig2> <2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG>, scriptPubKey是
OP_HASH160 8ac1d7a2fa204a16dc984fa81cfdf86a2a4e1731 OP_EQUAL
bool VerifyScript(const CScript& scriptSig, const CScript& scriptPubKey, const CScriptWitness* witness, unsigned int flags, const BaseSignatureChecker& checker, ScriptError* serror)
{
std::vector<std::vector<unsigned char> > stack, stackCopy;
//执行解锁脚本,执行完后stack中有了<Sig1> <Sig2> <2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG>
//这三个数据对象
if (!EvalScript(stack, scriptSig, flags, checker, SigVersion::BASE, serror))
// serror is set
return false;
//解锁脚本执行后
if (flags & SCRIPT_VERIFY_P2SH)
//由于下面EvalScript会破坏stack,而再后面仍然需要stack当前的数据,因而需要做一次拷贝
stackCopy = stack;
//执行锁定脚本 OP_HASH160 8ac1d7a2fa204a16dc984fa81cfdf86a2a4e1731 OP_EQUAL
//就是将<2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG>数据做hash160,然后和锁定脚本中的hash比较
if (!EvalScript(stack, scriptPubKey, flags, checker, SigVersion::BASE, serror))
// serror is set
return false;
//这一步结束后,hash160被验证通过了,此时栈只剩下<Sig1> <Sig2>
// Additional validation for spend-to-script-hash transactions:
//上一章节我们提到过,P2SH需要执行两次脚本验证,即还有子锁定脚本,下面就是在做这个事
if ((flags & SCRIPT_VERIFY_P2SH) && scriptPubKey.IsPayToScriptHash())
{
// scriptSig must be literals-only or validation fails
if (!scriptSig.IsPushOnly())
return set_error(serror, SCRIPT_ERR_SIG_PUSHONLY);
// Restore stack.
// hash160验证后,堆栈只剩下subscript.scriptSig了,我们换需要subscript.scriptPubKey
// 恢复前面保存下来的栈可以做到这一点,恢复后的栈结构. subscript.scriptSig + subscript.scriptPubKey_(_代表被序列化了,文本化了)
//即<Sig1> <Sig2> <2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG>
swap(stack, stackCopy);
// stack cannot be empty here, because if it was the
// P2SH HASH <> EQUAL scriptPubKey would be evaluated with
// an empty stack and the EvalScript above would return false.
assert(!stack.empty());
//从栈顶拿出subscript.scriptPubKey的脚本文本,并构建CScript对象
const valtype& pubKeySerialized = stack.back();
CScript pubKey2(pubKeySerialized.begin(), pubKeySerialized.end());
//到这里后,已经恢复出完整多签名子脚本了<Sig1> <Sig2> 2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG
//将subscript.scriptPubKey_文本弹出
//这一步相当于执行evalScript(subscript.scriptSig)
popstack(stack);
//然后evalScript(subscript.scriptPubKey)
//验证给的pubkey是否能够被解锁
if (!EvalScript(stack, pubKey2, flags, checker, SigVersion::BASE, serror))
// serror is set
return false;
}
return set_success(serror);
}
bool EvalScript(std::vector<std::vector<unsigned char> >& stack, const CScript& script, unsigned int flags, const BaseSignatureChecker& checker, SigVersion sigversion, ScriptError* serror)
{
CScript::const_iterator pc = script.begin();
CScript::const_iterator pend = script.end();
CScript::const_iterator pbegincodehash = script.begin();
opcodetype opcode;
valtype vchPushValue;
std::vector<bool> vfExec;
std::vector<valtype> altstack;
try
{
while (pc < pend)
{
bool fExec = !count(vfExec.begin(), vfExec.end(), false);
//脚本执行就是不停GetOp然后case处理的过程
if (!script.GetOp(pc, opcode, vchPushValue))
return set_error(serror, SCRIPT_ERR_BAD_OPCODE);
// Note how OP_RESERVED does not count towards the opcode limit.
if (opcode > OP_16 && ++nOpCount > MAX_OPS_PER_SCRIPT)
return set_error(serror, SCRIPT_ERR_OP_COUNT);
if (opcode == OP_CAT ||
opcode == OP_SUBSTR ||
......
opcode == OP_RSHIFT)
return set_error(serror, SCRIPT_ERR_DISABLED_OPCODE); // Disabled opcodes.
if (fExec && 0 <= opcode && opcode <= OP_PUSHDATA4) {
if (fRequireMinimal && !CheckMinimalPush(vchPushValue, opcode)) {
return set_error(serror, SCRIPT_ERR_MINIMALDATA);
}
stack.push_back(vchPushValue);
} else if (fExec || (OP_IF <= opcode && opcode <= OP_ENDIF))
switch (opcode)
{
//
// Push value
//
case OP_1NEGATE:
case OP_1:
case OP_2:
.....
case OP_15:
case OP_16:
{
// ( -- value)
CScriptNum bn((int)opcode - (int)(OP_1 - 1));
stack.push_back(bn.getvch());
// The result of these opcodes should always be the minimal way to push the data
// they push, so no need for a CheckMinimalPush here.
}
break;
…..
}
catch (...)
{
return set_error(serror, SCRIPT_ERR_UNKNOWN_ERROR);
}
}
从上可知,目前实现的执行多次脚本验证不是一个通用方案,而是通过检测脚本类型(P2SH)而做的特殊处理,只支持P2SH这个类型的脚本类型
P2SH锁定脚本scriptSig生成
下面来看看p2sh的scriptSig怎么生成的
bool ProduceSignature(const BaseSignatureCreator& creator, const CScript& fromPubKey, SignatureData& sigdata)
{
std::vector<valtype> result;
txnouttype whichType;
//这里会检测出fromPubKey是P2SH脚本,然后就会返回对应的Redeemscript给result当做scriptSig
bool solved = SignStep(creator, fromPubKey, result, whichType, SigVersion::BASE);
bool P2SH = false;
CScript subscript;
sigdata.scriptWitness.stack.clear();
//第一次SignStep执行后whichType == TX_SCRIPTHASH
//得到的result为Redeemscript的序列化文本
if (solved && whichType == TX_SCRIPTHASH)
{
// Solver returns the subscript that needs to be evaluated;
// the final scriptSig is the signatures from that
// and then the serialized subscript:
//这里将Redeemscript文本反序列化为多签名锁定脚本
//2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG
subscript = CScript(result[0].begin(), result[0].end());
//第二次SignStep会将作为pubKey的subscript解释为multsig脚本,然后会返回<Sig1> <Sig2>数据到result当做scriptSig
solved = solved && SignStep(creator, subscript, result, whichType, SigVersion::BASE) && whichType != TX_SCRIPTHASH;
P2SH = true;
}
//上面result里不是已经有subscript了啊,为啥要再次push_back呢
//因为SignStep函数每次都会清空result数据,所以需要再次push_back subscript数据
if (P2SH) {
result.push_back(std::vector<unsigned char>(subscript.begin(), subscript.end()));
}
//将真正的scriptSig返回
sigdata.scriptSig = PushAll(result);
}
static bool SignStep(const BaseSignatureCreator& creator, const CScript& scriptPubKey,
std::vector<valtype>& ret, txnouttype& whichTypeRet, SigVersion sigversion)
{
CScript scriptRet;
CScript scriptRet;
uint160 h160;
ret.clear();
std::vector<valtype> vSolutions;
if (!Solver(scriptPubKey, whichTypeRet, vSolutions))
return false;
CKeyID keyID;
switch (whichTypeRet)
{
case TX_SCRIPTHASH:
//这里会返回Redeemscript作为scriptSig解锁脚本
if (creator.Provider().GetCScript(uint160(vSolutions[0]), scriptRet)) {
ret.push_back(std::vector<unsigned char>(scriptRet.begin(), scriptRet.end()));
return true;
}
return false;
}
}
bool CBasicKeyStore::GetCScript(const CScriptID &hash, CScript& redeemScriptOut) const
{
LOCK(cs_KeyStore);
ScriptMap::const_iterator mi = mapScripts.find(hash);
if (mi != mapScripts.end())
{
redeemScriptOut = (*mi).second;
return true;
}
return false;
}
其实,还有一个疑问点,就是怎么根据P2SH的锁定脚本数据找到解锁脚本Redeemscript脚本的呢?
P2SH的锁定脚本数据有用的数据只有hash值,也即是如何通过hash找到Redeemscript脚本的呢?
仔细看的话,应该能看出一些端倪,就是mapScripts维护了一个hash和具体Redeemscript类型的scriptSig脚本的map关系.那你可能会说,整个链上这么多P2SH交易,hash就会很多,那这个mapScripts得多大啊。由于P2SH这个hash是<2 PK1 PK2 PK3 PK4 PK5 5 OP_CHECKMULTISIG>生成的,所以只有属于本地节点多账号签名生成的P2SH才需要添加到mapScripts.且肯定是先有多签名账号才有P2SH交易,因此只需要在创建多账号签名的点创建对应的Redeemscript并保存在mapScripts即可,事实上确实如此,流程如下。
UniValue addmultisigaddress(const JSONRPCRequest& request)
// Construct using pay-to-script-hash:
CScript inner = CreateMultisigRedeemscript(required, pubkeys);
pwallet->AddCScript(inner);
}
// Creates a multisig redeemscript from a given list of public keys and number required.
CScript CreateMultisigRedeemscript(const int required, const std::vector<CPubKey>& pubkeys)
{
CScript result = GetScriptForMultisig(required, pubkeys);
return result;
}
CScript GetScriptForMultisig(int nRequired, const std::vector<CPubKey>& keys)
{
CScript script;
script << CScript::EncodeOP_N(nRequired);
for (const CPubKey& key : keys)
script << ToByteVector(key);
script << CScript::EncodeOP_N(keys.size()) << OP_CHECKMULTISIG;
return script;
}
bool CBasicKeyStore::AddCScript(const CScript& redeemScript)
{
if (redeemScript.size() > MAX_SCRIPT_ELEMENT_SIZE)
return error("CBasicKeyStore::AddCScript(): redeemScripts > %i bytes are invalid", MAX_SCRIPT_ELEMENT_SIZE);
LOCK(cs_KeyStore);
mapScripts[CScriptID(redeemScript)] = redeemScript;
return true;
}
Solver函数
上面在分析P2SH的scriptSig就提到过Resolver,那Solver函数究竟做啥用的,它是用来解释scriptPubKey的,比如解释出scriptPubKey是什么类型的脚本,比如分析出判断一个scriptPubKey是否是P2SH,检验数据的合法性并取出
bool Solver(const CScript& scriptPubKey, txnouttype& typeRet, std::vector<std::vector<unsigned char> >& vSolutionsRet)
{
// Templates
static std::multimap<txnouttype, CScript> mTemplates;
if (mTemplates.empty())
{
// Standard tx, sender provides pubkey, receiver adds signature
mTemplates.insert(std::make_pair(TX_PUBKEY, CScript() << OP_PUBKEY << OP_CHECKSIG));
// Bitcoin address tx, sender provides hash of pubkey, receiver provides signature and pubkey
mTemplates.insert(std::make_pair(TX_PUBKEYHASH, CScript() << OP_DUP << OP_HASH160 << OP_PUBKEYHASH << OP_EQUALVERIFY << OP_CHECKSIG));
// Sender provides N pubkeys, receivers provides M signatures
mTemplates.insert(std::make_pair(TX_MULTISIG, CScript() << OP_SMALLINTEGER << OP_PUBKEYS << OP_SMALLINTEGER << OP_CHECKMULTISIG));
}
vSolutionsRet.clear();
//解释scriptPubkey的过程就是将scriptPubkey和模板对比,同时取出里面的数据比如
// Scan templates
const CScript& script1 = scriptPubKey;
for (const std::pair<txnouttype, CScript>& tplate : mTemplates)
{
const CScript& script2 = tplate.second;
vSolutionsRet.clear();
opcodetype opcode1, opcode2;
std::vector<unsigned char> vch1, vch2;
// Compare
CScript::const_iterator pc1 = script1.begin();
CScript::const_iterator pc2 = script2.begin();
while (true)
{
if (pc1 == script1.end() && pc2 == script2.end())
{
// Found a match
typeRet = tplate.first;
if (typeRet == TX_MULTISIG)
{
// Additional checks for TX_MULTISIG:
unsigned char m = vSolutionsRet.front()[0];
unsigned char n = vSolutionsRet.back()[0];
if (m < 1 || n < 1 || m > n || vSolutionsRet.size()-2 != n)
return false;
}
return true;
}
if (!script1.GetOp(pc1, opcode1, vch1))
break;
if (!script2.GetOp(pc2, opcode2, vch2))
break;
// Template matching opcodes:
if (opcode2 == OP_PUBKEYS)
{
while (vch1.size() >= 33 && vch1.size() <= 65)
{
vSolutionsRet.push_back(vch1);
if (!script1.GetOp(pc1, opcode1, vch1))
break;
}
if (!script2.GetOp(pc2, opcode2, vch2))
break;
// Normal situation is to fall through
// to other if/else statements
}
if (opcode2 == OP_PUBKEY)
{
if (vch1.size() < 33 || vch1.size() > 65)
break;
vSolutionsRet.push_back(vch1);
}
else if (opcode2 == OP_PUBKEYHASH)
{
if (vch1.size() != sizeof(uint160))
break;
vSolutionsRet.push_back(vch1);
}
else if (opcode1 != opcode2 || vch1 != vch2)
{
// Others must match exactly
break;
}
}
}
vSolutionsRet.clear();
typeRet = TX_NONSTANDARD;
return false;
}
附录:
脚本指令解释过程分析
上面在分析道evalScript执行函数时提到脚本执行就是不停GetOp然后case处理的过程,下面就来分析下GetOp
bool GetOp(const_iterator& pc, opcodetype& opcodeRet) const
{
return GetOp2(pc, opcodeRet, nullptr);
}
bool GetOp2(const_iterator& pc, opcodetype& opcodeRet, std::vector<unsigned char>* pvchRet) const
{
opcodeRet = OP_INVALIDOPCODE;
if (pvchRet)
pvchRet->clear();
if (pc >= end())
return false;
// Read instruction
if (end() - pc < 1)
return false;
//第一字节为指令
unsigned int opcode = *pc++;
// Immediate operand
if (opcode <= OP_PUSHDATA4)
{
unsigned int nSize = 0;
//[0x4] sig, 这类指令, opcode就是size
if (opcode < OP_PUSHDATA1)
{
nSize = opcode;
}
else if (opcode == OP_PUSHDATA1)
{
if (end() - pc < 1)
return false;
nSize = *pc++;
}
else if (opcode == OP_PUSHDATA2)
{
if (end() - pc < 2)
return false;
nSize = ReadLE16(&pc[0]);
pc += 2;
}
else if (opcode == OP_PUSHDATA4)
{
if (end() - pc < 4)
return false;
//进一步读取数据size数值, OP_PUSHDATA4 [xx][xx][xx][xx] <largedata>
nSize = ReadLE32(&pc[0]);
pc += 4;
}
if (end() - pc < 0 || (unsigned int)(end() - pc) < nSize)
return false;
if (pvchRet)
//读取真正的数据内容
pvchRet->assign(pc, pc + nSize);
pc += nSize;
}
opcodeRet = static_cast<opcodetype>(opcode);
return true;
}
其他脚本的scriptSig生成
static bool SignStep(const BaseSignatureCreator& creator, const CScript& scriptPubKey,
std::vector<valtype>& ret, txnouttype& whichTypeRet, SigVersion sigversion)
{
CScript scriptRet;
uint160 h160;
ret.clear();
std::vector<valtype> vSolutions;
if (!Solver(scriptPubKey, whichTypeRet, vSolutions))
return false;
CKeyID keyID;
switch (whichTypeRet)
{
case TX_NONSTANDARD:
case TX_NULL_DATA:
case TX_WITNESS_UNKNOWN:
return false;
case TX_PUBKEY:
//P2PK模板
keyID = CPubKey(vSolutions[0]).GetID();
return Sign1(keyID, creator, scriptPubKey, ret, sigversion);
case TX_PUBKEYHASH:
//P2PKH
keyID = CKeyID(uint160(vSolutions[0]));
if (!Sign1(keyID, creator, scriptPubKey, ret, sigversion))
return false;
else
{
CPubKey vch;
creator.Provider().GetPubKey(keyID, vch);
ret.push_back(ToByteVector(vch));
}
return true;
case TX_SCRIPTHASH:
//P2SH
if (creator.Provider().GetCScript(uint160(vSolutions[0]), scriptRet)) {
ret.push_back(std::vector<unsigned char>(scriptRet.begin(), scriptRet.end()));
return true;
}
return false;
case TX_MULTISIG:
//MS
ret.push_back(valtype()); // workaround CHECKMULTISIG bug
return (SignN(vSolutions, creator, scriptPubKey, ret, sigversion));
default:
return false;
}
/********************************
* 本文来自****博主"爱踢门"
******************************************/