Commit 69e7b23f authored by caopingcp's avatar caopingcp

improve tendermint stability

parent 8f8ae28f
......@@ -200,7 +200,7 @@ func (node *Node) Start() {
ip, _ := splitHostPort(addr)
_, ok := node.localIPs[ip]
if ok {
tendermintlog.Info("find our ip ", "ourip", ip)
tendermintlog.Info("find our ip ", "ourIP", ip)
node.IP = ip
return
}
......@@ -325,6 +325,8 @@ func (node *Node) UnicastRoutine() {
}
for _, peer := range node.peerSet.List() {
if peer.ID() == msg.PeerID {
peerIP, _ := peer.RemoteIP()
msg.PeerIP = peerIP.String()
success := peer.Send(msg)
if !success {
tendermintlog.Error("send failure in UnicastRoutine")
......@@ -400,6 +402,7 @@ func (node *Node) addPeer(pc *peerConn) error {
ID: node.ID,
Network: node.Network,
Version: node.Version,
IP: node.IP,
}
// Exchange NodeInfo on the conn
peerNodeInfo, err := pc.HandshakeTimeout(nodeinfo, handshakeTimeout*time.Second)
......@@ -460,6 +463,9 @@ func (node *Node) addPeer(pc *peerConn) error {
}
tendermintlog.Info("Added peer", "peer", pc.ip)
stateMsg := MsgInfo{TypeID: ttypes.NewRoundStepID, Msg: node.state.RoundStateMessage(), PeerID: pc.id, PeerIP: pc.ip.String()}
pc.Send(stateMsg)
tendermintlog.Info("Send state msg", "msg", stateMsg, "ourIP", node.IP, "ourID", node.ID)
return nil
}
......@@ -472,6 +478,9 @@ func (node *Node) Broadcast(msg MsgInfo) chan bool {
wg.Add(1)
go func(peer Peer) {
defer wg.Done()
msg.PeerID = peer.ID()
peerIP, _ := peer.RemoteIP()
msg.PeerIP = peerIP.String()
success := peer.Send(msg)
successChan <- success
}(peer)
......
This diff is collapsed.
......@@ -9,6 +9,7 @@ import (
"fmt"
"math/rand"
"os"
"sync/atomic"
"time"
"github.com/33cn/chain33/common/crypto"
......@@ -47,12 +48,13 @@ var (
preExec = false
createEmptyBlocksInterval int32 // second
validatorNodes = []string{"127.0.0.1:46656"}
peerGossipSleepDuration int32 = 200
peerGossipSleepDuration int32 = 100
peerQueryMaj23SleepDuration int32 = 2000
zeroHash [32]byte
random *rand.Rand
signName = "ed25519"
useAggSig = false
gossipVotes atomic.Value
)
func init() {
......@@ -150,6 +152,7 @@ func applyConfig(sub []byte) {
signName = subcfg.SignName
}
useAggSig = subcfg.UseAggregateSignature
gossipVotes.Store(true)
}
// DefaultDBProvider returns a database using the DBBackend and DBDir
......@@ -306,12 +309,11 @@ OuterLoop:
}
tendermintlog.Info("Save state from block")
}
tendermintlog.Debug("Load state finish", "state", state)
// start
tendermintlog.Info("StartConsensus",
"privValidator", fmt.Sprintf("%X", ttypes.Fingerprint(client.privValidator.GetAddress())),
"Validators", state.Validators.String())
"state", state)
// Log whether this node is a validator or an observer
if state.Validators.HasAddress(client.privValidator.GetAddress()) {
tendermintlog.Info("This node is a validator")
......@@ -424,7 +426,6 @@ func (client *Client) ProcEvent(msg *queue.Message) bool {
// CreateBlock a routine monitor whether some transactions available and tell client by available channel
func (client *Client) CreateBlock() {
issleep := true
for {
if client.IsClosed() {
tendermintlog.Info("CreateBlock quit")
......@@ -432,23 +433,18 @@ func (client *Client) CreateBlock() {
}
if !client.csState.IsRunning() {
tendermintlog.Info("consensus not running")
time.Sleep(time.Second)
time.Sleep(500 * time.Millisecond)
continue
}
if issleep {
time.Sleep(time.Second)
}
height, err := client.getLastHeight()
if err != nil {
issleep = true
continue
}
if !client.CheckTxsAvailable(height) {
issleep = true
time.Sleep(500 * time.Millisecond)
continue
}
issleep = false
client.txsAvailable <- height + 1
time.Sleep(time.Duration(timeoutTxAvail) * time.Millisecond)
......@@ -590,7 +586,7 @@ func (client *Client) QueryValidatorsByHeight(height int64) (*tmtypes.ValNodes,
if height < 1 {
return nil, ttypes.ErrHeightLessThanOne
}
req := &tmtypes.ReqNodeInfo{Height: height}
req := &tmtypes.ReqValNodes{Height: height}
param, err := proto.Marshal(req)
if err != nil {
tendermintlog.Error("QueryValidatorsByHeight marshal", "err", err)
......@@ -670,22 +666,43 @@ func (client *Client) Query_IsHealthy(req *types.ReqNil) (types.Message, error)
// Query_NodeInfo query validator node info
func (client *Client) Query_NodeInfo(req *types.ReqNil) (types.Message, error) {
nodes := client.csState.GetRoundState().Validators.Validators
validators := make([]*tmtypes.Validator, 0)
for _, node := range nodes {
if node == nil {
validators = append(validators, &tmtypes.Validator{})
vals := client.csState.GetRoundState().Validators.Validators
nodes := make([]*tmtypes.ValNodeInfo, 0)
for _, val := range vals {
if val == nil {
nodes = append(nodes, &tmtypes.ValNodeInfo{})
} else {
item := &tmtypes.Validator{
Address: node.Address,
PubKey: node.PubKey,
VotingPower: node.VotingPower,
Accum: node.Accum,
ipstr, idstr := "UNKOWN", "UNKOWN"
pub, err := ttypes.ConsensusCrypto.PubKeyFromBytes(val.PubKey)
if err != nil {
tendermintlog.Error("Query_NodeInfo invalid pubkey", "err", err)
} else {
id := GenIDByPubKey(pub)
idstr = string(id)
if id == client.node.ID {
ipstr = client.node.IP
} else {
ip := client.node.peerSet.GetIP(id)
if ip == nil {
tendermintlog.Error("Query_NodeInfo nil ip", "id", idstr)
} else {
ipstr = ip.String()
}
}
}
item := &tmtypes.ValNodeInfo{
NodeIP: ipstr,
NodeID: idstr,
Address: fmt.Sprintf("%X", val.Address),
PubKey: fmt.Sprintf("%X", val.PubKey),
VotingPower: val.VotingPower,
Accum: val.Accum,
}
validators = append(validators, item)
nodes = append(nodes, item)
}
}
return &tmtypes.ValidatorSet{Validators: validators, Proposer: &tmtypes.Validator{}}, nil
return &tmtypes.ValNodeInfoSet{Nodes: nodes}, nil
}
// CmpBestBlock 比较newBlock是不是最优区块
......
......@@ -272,7 +272,7 @@ func CheckState(t *testing.T, client *Client) {
assert.Equal(t, client.csState.Prevote(0), 1000*time.Millisecond)
assert.Equal(t, client.csState.Precommit(0), 1000*time.Millisecond)
assert.Equal(t, client.csState.PeerGossipSleep(), 200*time.Millisecond)
assert.Equal(t, client.csState.PeerGossipSleep(), 100*time.Millisecond)
assert.Equal(t, client.csState.PeerQueryMaj23Sleep(), 2000*time.Millisecond)
assert.Equal(t, client.csState.IsProposer(), true)
assert.Nil(t, client.csState.GetPrevotesState(state.LastBlockHeight, 0, nil))
......@@ -286,7 +286,7 @@ func CheckState(t *testing.T, client *Client) {
msg2, err := client.Query_NodeInfo(&types.ReqNil{})
assert.Nil(t, err)
tvals := msg2.(*vty.ValidatorSet).Validators
tvals := msg2.(*vty.ValNodeInfoSet).Nodes
assert.Len(t, tvals, 1)
err = client.CommitBlock(client.GetCurrentBlock())
......
......@@ -201,7 +201,7 @@ func (h *Header) StringIndented(indent string) string {
%s LastCommit: %v
%s Validators: %v
%s App: %v
%s Conensus: %v
%s Consensus: %v
%s Results: %v
%s}#%v`,
indent, h.ChainID,
......@@ -304,7 +304,7 @@ func (commit *Commit) IsCommit() bool {
// GetAggVote ...
func (commit *Commit) GetAggVote() *AggVote {
if commit == nil {
if commit == nil || commit.AggVote == nil {
return nil
}
aggVote := &AggVote{commit.AggVote}
......
......@@ -22,14 +22,16 @@ var (
// step and message id define
const (
RoundStepNewHeight = RoundStepType(0x01) // Wait til CommitTime + timeoutCommit
RoundStepNewRound = RoundStepType(0x02) // Setup new round and go to RoundStepPropose
RoundStepPropose = RoundStepType(0x03) // Did propose, gossip proposal
RoundStepPrevote = RoundStepType(0x04) // Did prevote, gossip prevotes
RoundStepPrevoteWait = RoundStepType(0x05) // Did receive any +2/3 prevotes, start timeout
RoundStepPrecommit = RoundStepType(0x06) // Did precommit, gossip precommits
RoundStepPrecommitWait = RoundStepType(0x07) // Did receive any +2/3 precommits, start timeout
RoundStepCommit = RoundStepType(0x08) // Entered commit state machine
RoundStepNewHeight = RoundStepType(0x01) // Wait til CommitTime + timeoutCommit
RoundStepNewRound = RoundStepType(0x02) // Setup new round and go to RoundStepPropose
RoundStepPropose = RoundStepType(0x03) // Did propose, gossip proposal
RoundStepPrevote = RoundStepType(0x04) // Did prevote, gossip prevotes
RoundStepAggPrevoteWait = RoundStepType(0x05) // Did send prevote for aggregate, start timeout
RoundStepPrevoteWait = RoundStepType(0x06) // Did receive any +2/3 prevotes, start timeout
RoundStepPrecommit = RoundStepType(0x07) // Did precommit, gossip precommits
RoundStepAggPrecommitWait = RoundStepType(0x08) // Did send precommit for aggregate, start timeout
RoundStepPrecommitWait = RoundStepType(0x09) // Did receive any +2/3 precommits, start timeout
RoundStepCommit = RoundStepType(0x10) // Entered commit state machine
// NOTE: RoundStepNewHeight acts as RoundStepCommitWait.
NewRoundStepID = byte(0x01)
......@@ -84,6 +86,10 @@ func (rs RoundStepType) String() string {
return "RoundStepPrecommitWait"
case RoundStepCommit:
return "RoundStepCommit"
case RoundStepAggPrevoteWait:
return "RoundStepAggPrevoteWait"
case RoundStepAggPrecommitWait:
return "RoundStepAggPrecommitWait"
default:
return "RoundStepUnknown" // Cannot panic.
}
......@@ -188,6 +194,7 @@ type PeerRoundState struct {
LastCommit *BitArray // All commit precommits of commit for last height.
CatchupCommitRound int // Round that we have commit for. Not necessarily unique. -1 if none.
CatchupCommit *BitArray // All commit precommits peer has for this height & CatchupCommitRound
AggPrevote bool // True if peer has aggregate prevote for this round
AggPrecommit bool // True if peer has aggregate precommit for this round
}
......@@ -208,6 +215,7 @@ func (prs PeerRoundState) StringIndented(indent string) string {
%s Precommits %v
%s LastCommit %v (round %v)
%s CatchupCommit %v (round %v)
%s AggPrevote %v
%s AggPrecommit %v
%s}`,
indent, prs.Height, prs.Round, prs.Step, prs.StartTime,
......@@ -219,6 +227,7 @@ func (prs PeerRoundState) StringIndented(indent string) string {
indent, prs.Precommits,
indent, prs.LastCommit, prs.LastCommitRound,
indent, prs.CatchupCommit, prs.CatchupCommitRound,
indent, prs.AggPrevote,
indent, prs.AggPrecommit,
indent)
}
......
......@@ -40,6 +40,7 @@ func ValCmd() *cobra.Command {
IsSyncCmd(),
GetBlockInfoCmd(),
GetNodeInfoCmd(),
GetPerfStatCmd(),
AddNodeCmd(),
CreateCmd(),
)
......@@ -75,7 +76,7 @@ func GetNodeInfoCmd() *cobra.Command {
func getNodeInfo(cmd *cobra.Command, args []string) {
rpcLaddr, _ := cmd.Flags().GetString("rpc_laddr")
var res []*vt.Validator
var res *vt.ValNodeInfoSet
ctx := jsonclient.NewRPCCtx(rpcLaddr, "valnode.GetNodeInfo", nil, &res)
ctx.Run()
}
......@@ -113,6 +114,41 @@ func getBlockInfo(cmd *cobra.Command, args []string) {
ctx.Run()
}
// GetPerfStatCmd get block info
func GetPerfStatCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "stat",
Short: "Get tendermint performance statistics",
Run: getPerfStat,
}
addGetPerfStatFlags(cmd)
return cmd
}
func addGetPerfStatFlags(cmd *cobra.Command) {
cmd.Flags().Int64P("start", "s", 0, "start block height")
cmd.Flags().Int64P("end", "e", 0, "end block height")
}
func getPerfStat(cmd *cobra.Command, args []string) {
rpcLaddr, _ := cmd.Flags().GetString("rpc_laddr")
start, _ := cmd.Flags().GetInt64("start")
end, _ := cmd.Flags().GetInt64("end")
req := &vt.ReqPerfStat{
Start: start,
End: end,
}
params := rpctypes.Query4Jrpc{
Execer: vt.ValNodeX,
FuncName: "GetPerfState",
Payload: types.MustPBToJSON(req),
}
var res vt.PerfStat
ctx := jsonclient.NewRPCCtx(rpcLaddr, "Chain33.Query", params, &res)
ctx.Run()
}
// AddNodeCmd add validator node
func AddNodeCmd() *cobra.Command {
cmd := &cobra.Command{
......
......@@ -10,7 +10,7 @@ import (
)
// Query_GetValNodeByHeight method
func (val *ValNode) Query_GetValNodeByHeight(in *pty.ReqNodeInfo) (types.Message, error) {
func (val *ValNode) Query_GetValNodeByHeight(in *pty.ReqValNodes) (types.Message, error) {
height := in.GetHeight()
if height <= 0 {
......@@ -60,3 +60,60 @@ func (val *ValNode) Query_GetBlockInfoByHeight(in *pty.ReqBlockInfo) (types.Mess
}
return reply, nil
}
// Query_GetPerfState method
func (val *ValNode) Query_GetPerfState(in *pty.ReqPerfStat) (types.Message, error) {
start := in.GetStart()
end := in.GetEnd()
if start < 0 || end < 0 || start > end || end > val.GetHeight() {
return nil, types.ErrInvalidParam
}
if start == 0 {
start = 1
}
if end == 0 {
end = val.GetHeight()
}
startKey := CalcValNodeBlockInfoHeightKey(start)
startValue, err := val.GetLocalDB().Get(startKey)
if err != nil {
return nil, err
}
if len(startValue) == 0 {
return nil, types.ErrNotFound
}
startInfo := &pty.TendermintBlockInfo{}
err = types.Decode(startValue, startInfo)
if err != nil {
return nil, err
}
endKey := CalcValNodeBlockInfoHeightKey(end)
endValue, err := val.GetLocalDB().Get(endKey)
if err != nil {
return nil, err
}
if len(endValue) == 0 {
return nil, types.ErrNotFound
}
endInfo := &pty.TendermintBlockInfo{}
err = types.Decode(endValue, endInfo)
if err != nil {
return nil, err
}
startHeader := startInfo.Block.Header
endHeader := endInfo.Block.Header
totalTx := endHeader.TotalTxs - startHeader.TotalTxs
totalBlock := endHeader.Height - startHeader.Height + 1
totalSecond := endHeader.Time - startHeader.Time + 1
return &pty.PerfStat{
TotalTx: totalTx,
TotalBlock: totalBlock,
TxPerBlock: totalTx / totalBlock,
TotalSecond: totalSecond,
TxPerSecond: totalTx / totalSecond,
}, nil
}
......@@ -21,7 +21,7 @@ message ValNodeAction {
int32 Ty = 3;
}
message ReqNodeInfo {
message ReqValNodes {
int64 height = 1;
}
......@@ -29,7 +29,33 @@ message ReqBlockInfo {
int64 height = 1;
}
message ValNodeInfo {
string nodeIP = 1;
string nodeID = 2;
string address = 3;
string pubKey = 4;
int64 votingPower = 5;
int64 accum = 6;
}
message ValNodeInfoSet {
repeated ValNodeInfo nodes = 1;
}
message PerfStat {
int64 totalTx = 1;
int64 totalBlock = 2;
int64 txPerBlock = 3;
int64 totalSecond = 4;
int64 txPerSecond = 5;
}
message ReqPerfStat {
int64 start = 1;
int64 end = 2;
}
service valnode {
rpc IsSync(ReqNil) returns (IsHealthy) {}
rpc GetNodeInfo(ReqNil) returns (ValidatorSet) {}
rpc GetNodeInfo(ReqNil) returns (ValNodeInfoSet) {}
}
\ No newline at end of file
......@@ -34,12 +34,12 @@ func (c *Jrpc) IsSync(req *types.ReqNil, result *interface{}) error {
}
// GetNodeInfo query node info
func (c *channelClient) GetNodeInfo(ctx context.Context, req *types.ReqNil) (*vt.ValidatorSet, error) {
func (c *channelClient) GetNodeInfo(ctx context.Context, req *types.ReqNil) (*vt.ValNodeInfoSet, error) {
data, err := c.QueryConsensusFunc("tendermint", "NodeInfo", &types.ReqNil{})
if err != nil {
return nil, err
}
if resp, ok := data.(*vt.ValidatorSet); ok {
if resp, ok := data.(*vt.ValNodeInfoSet); ok {
return resp, nil
}
return nil, types.ErrDecode
......@@ -51,6 +51,6 @@ func (c *Jrpc) GetNodeInfo(req *types.ReqNil, result *interface{}) error {
if err != nil {
return err
}
*result = data.Validators
*result = data
return nil
}
......@@ -63,15 +63,16 @@ func TestChannelClient_GetNodeInfo(t *testing.T) {
client := newGrpc(api)
client.Init("valnode", nil, nil, nil)
req := &types.ReqNil{}
node := &vt.Validator{
Address: []byte("aaa"),
PubKey: []byte("bbb"),
node := &vt.ValNodeInfo{
NodeIP: "127.0.0.1",
NodeID: "001",
Address: "aaa",
PubKey: "bbb",
VotingPower: 10,
Accum: -1,
}
set := &vt.ValidatorSet{
Validators: []*vt.Validator{node},
Proposer: node,
set := &vt.ValNodeInfoSet{
Nodes: []*vt.ValNodeInfo{node},
}
api.On("QueryConsensusFunc", "tendermint", "NodeInfo", req).Return(set, nil)
result, err := client.GetNodeInfo(context.Background(), req)
......@@ -84,18 +85,19 @@ func TestJrpc_GetNodeInfo(t *testing.T) {
J := newJrpc(api)
req := &types.ReqNil{}
var result interface{}
node := &vt.Validator{
Address: []byte("aaa"),
PubKey: []byte("bbb"),
node := &vt.ValNodeInfo{
NodeIP: "127.0.0.1",
NodeID: "001",
Address: "aaa",
PubKey: "bbb",
VotingPower: 10,
Accum: -1,
}
set := &vt.ValidatorSet{
Validators: []*vt.Validator{node},
Proposer: node,
set := &vt.ValNodeInfoSet{
Nodes: []*vt.ValNodeInfo{node},
}
api.On("QueryConsensusFunc", "tendermint", "NodeInfo", req).Return(set, nil)
err := J.GetNodeInfo(req, &result)
assert.Nil(t, err)
assert.EqualValues(t, set.Validators, result)
assert.EqualValues(t, set, result)
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment