profile
viewpoint
If you are wondering where the data of this site comes from, please visit https://api.github.com/users/tychoish/events. GitMemory does not store any data, but only uses NGINX to cache data for a period of time. The idea behind GitMemory is simply to give users a better reading experience.

tendermint/tendermint 4337

⟁ Tendermint Core (BFT Consensus) in Go

tychoish/.emacs.d 27

Tycho Emacs Config Kit

tychoish/buildcloth 7

a lightweight python module for generating makefiles.

tychoish/cl-grip 3

Common Lisp pluggable logging system

tychoish/buildbot 1

Python-based continuous integration testing framework; send pull requests for your patches!

tychoish/ablog 0

ABlog for blogging with Sphinx

tychoish/amboy 0

Go-lang distributed task queue interfaces.

Pull request review commenttendermint/tendermint

rpc: implement BroadcastTxCommit without event subscriptions

 func (env *Environment) BroadcastTxSync(ctx *rpctypes.Context, tx types.Tx) (*co // BroadcastTxCommit returns with the responses from CheckTx and DeliverTx. // More: https://docs.tendermint.com/master/rpc/#/Tx/broadcast_tx_commit func (env *Environment) BroadcastTxCommit(ctx *rpctypes.Context, tx types.Tx) (*coretypes.ResultBroadcastTxCommit, error) { //nolint:lll-	subscriber := ctx.RemoteAddr()--	if env.EventBus.NumClients() >= env.Config.MaxSubscriptionClients {-		return nil, fmt.Errorf("max_subscription_clients %d reached", env.Config.MaxSubscriptionClients)-	} else if env.EventBus.NumClientSubscriptions(subscriber) >= env.Config.MaxSubscriptionsPerClient {-		return nil, fmt.Errorf("max_subscriptions_per_client %d reached", env.Config.MaxSubscriptionsPerClient)-	}--	// Subscribe to tx being committed in block.-	subCtx, cancel := context.WithTimeout(ctx.Context(), SubscribeTimeout)-	defer cancel()-	q := types.EventQueryTxFor(tx)-	deliverTxSub, err := env.EventBus.Subscribe(subCtx, subscriber, q)-	if err != nil {-		err = fmt.Errorf("failed to subscribe to tx: %w", err)-		env.Logger.Error("Error on broadcast_tx_commit", "err", err)-		return nil, err-	}-	defer func() {-		args := tmpubsub.UnsubscribeArgs{Subscriber: subscriber, Query: q}-		if err := env.EventBus.Unsubscribe(context.Background(), args); err != nil {-			env.Logger.Error("Error unsubscribing from eventBus", "err", err)-		}-	}()--	// Broadcast tx and wait for CheckTx result-	checkTxResCh := make(chan *abci.Response, 1)-	err = env.Mempool.CheckTx(+	resCh := make(chan *abci.Response, 1)+	err := env.Mempool.CheckTx( 		ctx.Context(), 		tx,-		func(res *abci.Response) { checkTxResCh <- res },+		func(res *abci.Response) { resCh <- res }, 		mempool.TxInfo{}, 	) 	if err != nil {-		env.Logger.Error("Error on broadcastTxCommit", "err", err)-		return nil, fmt.Errorf("error on broadcastTxCommit: %v", err)+		return nil, err 	} -	checkTxResMsg := <-checkTxResCh-	checkTxRes := checkTxResMsg.GetCheckTx()+	r := (<-resCh).GetCheckTx() -	if checkTxRes.Code != abci.CodeTypeOK {+	if !indexer.KVSinkEnabled(env.EventSinks) { 		return &coretypes.ResultBroadcastTxCommit{-			CheckTx:   *checkTxRes,-			DeliverTx: abci.ResponseDeliverTx{},-			Hash:      tx.Hash(),-		}, nil+				CheckTx: *r,+				Hash:    tx.Hash(),+			},+			errors.New("cannot wait for commit because kvEventSync is not enabled") 	} -	// Wait for the tx to be included in a block or timeout.-	select {-	case msg := <-deliverTxSub.Out(): // The tx was included in a block.-		deliverTxRes := msg.Data().(types.EventDataTx)-		return &coretypes.ResultBroadcastTxCommit{-			CheckTx:   *checkTxRes,-			DeliverTx: deliverTxRes.Result,-			Hash:      tx.Hash(),-			Height:    deliverTxRes.Height,-		}, nil-	case <-deliverTxSub.Canceled():-		var reason string-		if deliverTxSub.Err() == nil {-			reason = "Tendermint exited"-		} else {-			reason = deliverTxSub.Err().Error()+	startAt := time.Now()+	timer := time.NewTimer(0)+	defer timer.Stop()++	for {+		select {+		case <-ctx.Context().Done():+			env.Logger.Error("Error on broadcastTxCommit",+				"duration", time.Since(startAt),+				"err", err)+			return &coretypes.ResultBroadcastTxCommit{+					CheckTx: *r,+					Hash:    tx.Hash(),+				}, fmt.Errorf("timeout waiting for commit of tx %s (%s)",+					tx.String(), time.Since(startAt))+		case <-timer.C:+			txres, err := env.Tx(ctx, tx.Hash(), false)+			if err != nil {+				jitter := 100*time.Millisecond + time.Duration(rand.Int63n(int64(time.Second))) // nolint: gosec

I added a 100ms backoff per attempt, plus a random amount of time between 0.1s and 1.1s... I suspect the details don't matter a lot, and like a consistent 250ms would also probably be fine.

tychoish

comment created time in 8 hours

PullRequestReviewEvent
PullRequestReviewEvent

push eventtychoish/tendermint

tycho garen

commit sha 697a08f49d0728dc86ba1db22381d4716241eb23

fix type

view details

push time in 12 hours

pull request commenttendermint/tendermint

rpc: implement BroadcastTxCommit without event subscriptions

This seems good. Am I correct, however, in believing that the old solution would work even with a null or psql indexer hooked up (since it happens off the event stream rather than depending on the event sink's search functionality)? Maybe that's not a problem in practice, though.

Are there cases where no indexers are enabled? I'm not sure we can make the null indexer do this, exactly, but you'd know better than me...

Otherwise, I like this implementation a lot better. We could also implement Tx lookup on the psql indexer, to support this case—it doesn't require a complex query.

For sure! That'd probably be a good separate patch.

tychoish

comment created time in 12 hours

push eventtychoish/tendermint

Sam Kleinman

commit sha 5ccd668c788c93dc6bc0998e4590efc1094d1b2a

e2e: load should be proportional to network (#6983)

view details

tycho garen

commit sha 9b0c40831cc292bba828ab1e8854920c4023a792

add backoff

view details

tycho garen

commit sha 7e6c35318d04802325a2198dffac2a8ca2753acf

Merge remote-tracking branch 'origin/master' into rpc-broadcast-tx-commit-experiement

view details

push time in 12 hours

Pull request review commenttendermint/tendermint

rpc: implement BroadcastTxCommit without event subscriptions

 func (env *Environment) BroadcastTxSync(ctx *rpctypes.Context, tx types.Tx) (*co // BroadcastTxCommit returns with the responses from CheckTx and DeliverTx. // More: https://docs.tendermint.com/master/rpc/#/Tx/broadcast_tx_commit func (env *Environment) BroadcastTxCommit(ctx *rpctypes.Context, tx types.Tx) (*coretypes.ResultBroadcastTxCommit, error) { //nolint:lll-	subscriber := ctx.RemoteAddr()--	if env.EventBus.NumClients() >= env.Config.MaxSubscriptionClients {-		return nil, fmt.Errorf("max_subscription_clients %d reached", env.Config.MaxSubscriptionClients)-	} else if env.EventBus.NumClientSubscriptions(subscriber) >= env.Config.MaxSubscriptionsPerClient {-		return nil, fmt.Errorf("max_subscriptions_per_client %d reached", env.Config.MaxSubscriptionsPerClient)-	}--	// Subscribe to tx being committed in block.-	subCtx, cancel := context.WithTimeout(ctx.Context(), SubscribeTimeout)-	defer cancel()-	q := types.EventQueryTxFor(tx)-	deliverTxSub, err := env.EventBus.Subscribe(subCtx, subscriber, q)-	if err != nil {-		err = fmt.Errorf("failed to subscribe to tx: %w", err)-		env.Logger.Error("Error on broadcast_tx_commit", "err", err)-		return nil, err-	}-	defer func() {-		args := tmpubsub.UnsubscribeArgs{Subscriber: subscriber, Query: q}-		if err := env.EventBus.Unsubscribe(context.Background(), args); err != nil {-			env.Logger.Error("Error unsubscribing from eventBus", "err", err)-		}-	}()--	// Broadcast tx and wait for CheckTx result-	checkTxResCh := make(chan *abci.Response, 1)-	err = env.Mempool.CheckTx(+	resCh := make(chan *abci.Response, 1)+	err := env.Mempool.CheckTx( 		ctx.Context(), 		tx,-		func(res *abci.Response) { checkTxResCh <- res },+		func(res *abci.Response) { resCh <- res }, 		mempool.TxInfo{}, 	) 	if err != nil {-		env.Logger.Error("Error on broadcastTxCommit", "err", err)-		return nil, fmt.Errorf("error on broadcastTxCommit: %v", err)+		return nil, err 	} -	checkTxResMsg := <-checkTxResCh-	checkTxRes := checkTxResMsg.GetCheckTx()+	r := (<-resCh).GetCheckTx() -	if checkTxRes.Code != abci.CodeTypeOK {+	if !indexer.KVSinkEnabled(env.EventSinks) { 		return &coretypes.ResultBroadcastTxCommit{-			CheckTx:   *checkTxRes,-			DeliverTx: abci.ResponseDeliverTx{},-			Hash:      tx.Hash(),-		}, nil+				CheckTx: *r,+				Hash:    tx.Hash(),+			},+			errors.New("cannot wait for commit because kvEventSync is not enabled") 	} -	// Wait for the tx to be included in a block or timeout.-	select {-	case msg := <-deliverTxSub.Out(): // The tx was included in a block.-		deliverTxRes := msg.Data().(types.EventDataTx)-		return &coretypes.ResultBroadcastTxCommit{-			CheckTx:   *checkTxRes,-			DeliverTx: deliverTxRes.Result,-			Hash:      tx.Hash(),-			Height:    deliverTxRes.Height,-		}, nil-	case <-deliverTxSub.Canceled():-		var reason string-		if deliverTxSub.Err() == nil {-			reason = "Tendermint exited"-		} else {-			reason = deliverTxSub.Err().Error()+	startAt := time.Now()+	timer := time.NewTimer(0)+	defer timer.Stop()++	for {+		select {+		case <-ctx.Context().Done():+			env.Logger.Error("Error on broadcastTxCommit",+				"duration", time.Since(startAt),+				"err", err)+			return &coretypes.ResultBroadcastTxCommit{+					CheckTx: *r,+					Hash:    tx.Hash(),+				}, fmt.Errorf("timeout waiting for commit of tx %s (%s)",+					tx.String(), time.Since(startAt))+		case <-timer.C:+			txres, err := env.Tx(ctx, tx.Hash(), false)+			if err != nil {+				jitter := 100*time.Millisecond + time.Duration(rand.Int63n(int64(time.Second))) // nolint: gosec

the one wrinkle is that we don't actually have that much time to work with because the context gets canceled pretty quickly.

tychoish

comment created time in 12 hours

PullRequestReviewEvent

push eventtychoish/tendermint

Sam Kleinman

commit sha 5ccd668c788c93dc6bc0998e4590efc1094d1b2a

e2e: load should be proportional to network (#6983)

view details

Sam Kleinman

commit sha f59814950bf61bec1a57bf7167102783c540f370

Merge branch 'master' into e2e-apphash-check

view details

push time in 12 hours

push eventtendermint/tendermint

Sam Kleinman

commit sha 5ccd668c788c93dc6bc0998e4590efc1094d1b2a

e2e: load should be proportional to network (#6983)

view details

push time in 12 hours

PR merged tendermint/tendermint

Reviewers
e2e: load should be proportional to network

Historically, the workload in the e2e test suite, was inversely proportional to the number of nodes in the network (more threads for smaller networks,) but the generator also has a lot of sleep/rests to ensure that the workload doesn't overwhlem the network.

This is all somewhat complicated because the e2e tests all run on a single system, so no only does the workload have to avoid overwhelming the capability of a node, but it also has to avoid overwhelming the capability of the system it's running on, which can be an issue on the CI infrastrucure.

Also, while the e2e tests need some woarkload to validate that the network works, the main goal of these tests is to make sure the system works: proving that it's possible generate a workload that can overwhlem the system is not particularly interesting. As a result, we've spent a lot of time finding a workload that fits in the context of our tests. If the workload is too active then the nodes can get overwhelmed and the network doesn't succeed, and if the worklod is too small and no transactions land successfully, then the network can fail as wll.

Therefore, this change, attempts to make the workload more consistent, and have a more rational/predictable model for workload:

  • the size of the workload should be proportional to the number of nodes in the network

  • in general the workload geneator should mostly sleep for jitter purposes, and not as a means of controling the size of the workload.

+12 -8

0 comment

1 changed file

tychoish

pr closed time in 12 hours

Pull request review commenttendermint/tendermint

e2e: load should be proportional to network

 func Load(ctx context.Context, testnet *e2e.Testnet) error { 	// CPU. This gives high-throughput small networks and low-throughput large ones. 	// This also limits the number of TCP connections, since each worker has 	// a connection to all nodes.-	concurrency := 64 / len(testnet.Nodes)-	if concurrency == 0 {-		concurrency = 1+	concurrency := len(testnet.Nodes) * 8+	if concurrency > 64 {

the load generation isn't really CPU intensive, and I think it makes sense to have it be in relationship to the number of nodes, generally.

tychoish

comment created time in 12 hours

PullRequestReviewEvent

push eventtychoish/tendermint

tycho garen

commit sha c06f7a6fa59181790d31f7e7476c83c938514134

change order of assertions

view details

push time in 13 hours

PR opened tendermint/tendermint

Please add a description of the changes that this PR introduces and the files that are the most critical to review.

If this PR fixes an open Issue, please include "Closes #XXX" (where "XXX" is the Issue number) so that GitHub will automatically close the Issue when this PR is merged.

+1 -1

0 comment

1 changed file

pr created time in 13 hours

push eventtychoish/tendermint

tycho garen

commit sha 59d3b73422e9489d025488a946dd1dd2abcd6170

e2e: make app hash check more stable

view details

push time in 13 hours

create barnchtychoish/tendermint

branch : e2e-apphash-check

created branch time in 13 hours

push eventtychoish/tendermint

tycho garen

commit sha ff5e900cc40091f70bf3bdb9fae0633c588d6a7b

fix lint

view details

push time in 13 hours

PR opened tendermint/tendermint

rpc: implement BroadcastTxCommit without event subscriptions

This is 60% an excuse to run all the tests and see what sticks, and 40% an excuse to get other people to take a look at this.

+40 -71

0 comment

1 changed file

pr created time in 13 hours

create barnchtychoish/tendermint

branch : rpc-broadcast-tx-commit-experiement

created branch time in 13 hours

push eventtychoish/tendermint

tycho garen

commit sha 8f60f575a64a6308c04363a5f172c48759139863

cap concurrency

view details

push time in 14 hours

PR opened tendermint/tendermint

e2e: load should be proportional to network

Historically, the workload in the e2e test suite, was inversely proportional to the number of nodes in the network (more threads for smaller networks,) but the generator also has a lot of sleep/rests to ensure that the workload doesn't overwhlem the network.

This is all somewhat complicated because the e2e tests all run on a single system, so no only does the workload have to avoid overwhelming the capability of a node, but it also has to avoid overwhelming the capability of the system it's running on, which can be an issue on the CI infrastrucure.

Also, while the e2e tests need some woarkload to validate that the network works, the main goal of these tests is to make sure the system works: proving that it's possible generate a workload that can overwhlem the system is not particularly interesting. As a result, we've spent a lot of time finding a workload that fits in the context of our tests. If the workload is too active then the nodes can get overwhelmed and the network doesn't succeed, and if the worklod is too small and no transactions land successfully, then the network can fail as wll.

Therefore, this change, attempts to make the workload more consistent, and have a more rational/predictable model for workload:

  • the size of the workload should be proportional to the number of nodes in the network

  • in general the workload geneator should mostly sleep for jitter purposes, and not as a means of controling the size of the workload.

+10 -9

0 comment

1 changed file

pr created time in 14 hours

create barnchtychoish/tendermint

branch : e2e-proportional-load

created branch time in 14 hours

push eventtendermint/tendermint

Sam Kleinman

commit sha e94c418ad91ed92e02532b57fc327f9caab73168

e2e: always preserve failed networks (#6981)

view details

push time in 14 hours

PR merged tendermint/tendermint

Reviewers
e2e: always preserve failed networks

I think this is closer to what we actually want.

+15 -12

0 comment

2 changed files

tychoish

pr closed time in 14 hours

push eventtychoish/tendermint

Sam Kleinman

commit sha 3d410e4a6ba3369332099b280a895b1848c79d8b

e2e: only check validator sets after statesync (#6980)

view details

Sam Kleinman

commit sha 466a185bd6e4cab88278357c36cfff6d31dda21e

Merge branch 'master' into e2e-always-preserve-failures

view details

push time in 15 hours

push eventtendermint/tendermint

Sam Kleinman

commit sha 3d410e4a6ba3369332099b280a895b1848c79d8b

e2e: only check validator sets after statesync (#6980)

view details

push time in 15 hours

PR merged tendermint/tendermint

Reviewers
e2e: only check validator sets after statesync

This should avoid errors in cases where backfill hasn't finished/succeeded.

+8 -0

0 comment

1 changed file

tychoish

pr closed time in 15 hours

push eventtychoish/tendermint

tycho garen

commit sha e4e4fccf7fef51f6a4cdf71581c92bdaf3999374

add comment

view details

tycho garen

commit sha cacbb9f70175409e9ec52a091ce61a8eb4ee6293

Merge remote-tracking branch 'tychoish/e2e-validator-set-test' into e2e-validator-set-test

view details

push time in 15 hours

PullRequestReviewEvent