@@ -126,7 +126,7 @@ func Test_Disconnect(t *testing.T) {
126
126
go func () {
127
127
c .Disconnect (250 )
128
128
cli := c .(* client )
129
- cli .status = connected
129
+ cli .status . forceConnectionStatus ( connected )
130
130
c .Disconnect (250 )
131
131
close (disconnectC )
132
132
}()
@@ -1191,29 +1191,36 @@ func Test_cleanUpMids_2(t *testing.T) {
1191
1191
ops .SetKeepAlive (10 * time .Second )
1192
1192
1193
1193
c := NewClient (ops )
1194
+ cl := c .(* client )
1194
1195
1195
1196
if token := c .Connect (); token .Wait () && token .Error () != nil {
1196
1197
t .Fatalf ("Error on Client.Connect(): %v" , token .Error ())
1197
1198
}
1198
1199
1199
1200
token := c .Publish ("/test/cleanUP" , 2 , false , "cleanup test 2" )
1200
- if len (c .(* client ).messageIds .index ) == 0 {
1201
+ cl .messageIds .mu .Lock ()
1202
+ mq := len (c .(* client ).messageIds .index )
1203
+ cl .messageIds .mu .Unlock ()
1204
+ if mq == 0 {
1201
1205
t .Fatalf ("Should be a token in the messageIDs, none found" )
1202
1206
}
1203
- fmt .Println ("Disconnecting" , len (c .( * client ) .messageIds .index ))
1207
+ // fmt.Println("Disconnecting", len(cl .messageIds.index))
1204
1208
c .Disconnect (0 )
1205
1209
1206
1210
fmt .Println ("Wait on Token" )
1207
1211
// We should be able to wait on this token without any issue
1208
1212
token .Wait ()
1209
1213
1210
- if len (c .(* client ).messageIds .index ) > 0 {
1214
+ cl .messageIds .mu .Lock ()
1215
+ mq = len (c .(* client ).messageIds .index )
1216
+ cl .messageIds .mu .Unlock ()
1217
+ if mq > 0 {
1211
1218
t .Fatalf ("Should have cleaned up messageIDs, have %d left" , len (c .(* client ).messageIds .index ))
1212
1219
}
1213
1220
if token .Error () == nil {
1214
1221
t .Fatal ("token should have received an error on connection loss" )
1215
1222
}
1216
- fmt .Println (token .Error ())
1223
+ // fmt.Println(token.Error())
1217
1224
}
1218
1225
1219
1226
func Test_ConnectRetry (t * testing.T ) {
@@ -1339,7 +1346,6 @@ func Test_ResumeSubs(t *testing.T) {
1339
1346
t .Fatalf ("Expected 1 packet to be in store" )
1340
1347
}
1341
1348
packet := subMemStore .Get (ids [0 ])
1342
- fmt .Println ("packet" , packet )
1343
1349
if packet == nil {
1344
1350
t .Fatal ("Failed to retrieve packet from store" )
1345
1351
}
@@ -1471,11 +1477,12 @@ func Test_ResumeSubsWithReconnect(t *testing.T) {
1471
1477
c .Disconnect (250 )
1472
1478
}
1473
1479
1474
- // Issue 209 - occasional deadlock when connections are lost unexpectedly
1480
+ // Issue 509 - occasional deadlock when connections are lost unexpectedly
1475
1481
// This was quite a nasty deadlock which occurred in very rare circumstances; I could not come up with a reliable way of
1476
1482
// replicating this but the below would cause it to happen fairly consistently (when the test was run a decent number
1477
1483
// of times). Following the fix it ran 10,000 times without issue.
1478
- // go test -count 10000 -run DisconnectWhileProcessingIncomingPublish
1484
+ //
1485
+ // go test -count 10000 -run DisconnectWhileProcessingIncomingPublish
1479
1486
func Test_DisconnectWhileProcessingIncomingPublish (t * testing.T ) {
1480
1487
topic := "/test/DisconnectWhileProcessingIncomingPublish"
1481
1488
@@ -1487,11 +1494,11 @@ func Test_DisconnectWhileProcessingIncomingPublish(t *testing.T) {
1487
1494
1488
1495
sops := NewClientOptions ()
1489
1496
sops .AddBroker (FVTTCP )
1490
- sops .SetAutoReconnect (false ) // We dont want the connection to be re-established
1497
+ sops .SetAutoReconnect (false ) // We don't want the connection to be re-established
1491
1498
sops .SetWriteTimeout (500 * time .Millisecond ) // We will be sending a lot of publish messages and want go routines to clear...
1492
1499
// sops.SetOrderMatters(false)
1493
1500
sops .SetClientID ("dwpip-sub" )
1494
- // We need to know when the subscriber has lost its connection (this indicates that the deadlock has not occured )
1501
+ // We need to know when the subscriber has lost its connection (this indicates that the deadlock has not occurred )
1495
1502
sDisconnected := make (chan struct {})
1496
1503
sops .SetConnectionLostHandler (func (Client , error ) { close (sDisconnected ) })
1497
1504
@@ -1523,20 +1530,23 @@ func Test_DisconnectWhileProcessingIncomingPublish(t *testing.T) {
1523
1530
i := 0
1524
1531
for {
1525
1532
p .Publish (topic , 1 , false , fmt .Sprintf ("test message: %d" , i ))
1526
- // After the connection goes down s.Publish will start blocking (this is not ideal but fixing its a problem for another time)
1527
- go func () { s .Publish (topic + "IGNORE" , 1 , false , fmt .Sprintf ("test message: %d" , i )) }()
1533
+ // After the connection goes down s.Publish will start blocking (this is not ideal but fixing it's a problem for another time)
1534
+ go func (i int ) { s .Publish (topic + "IGNORE" , 1 , false , fmt .Sprintf ("test message: %d" , i )) }(i )
1528
1535
i ++
1529
-
1530
1536
if ctx .Err () != nil {
1531
1537
return
1532
1538
}
1533
1539
}
1534
1540
}()
1535
1541
1536
1542
// Wait until we have received a message (ensuring that the stream of messages has started)
1543
+ delay := time .NewTimer (time .Second ) // Be careful with timers as this will be run in a tight loop!
1537
1544
select {
1538
1545
case <- msgReceived : // All good
1539
- case <- time .After (time .Second ):
1546
+ if ! delay .Stop () { // Cleanly close timer as this may be run in a tight loop!
1547
+ <- delay .C
1548
+ }
1549
+ case <- delay .C :
1540
1550
t .Errorf ("no messages received" )
1541
1551
}
1542
1552
@@ -1545,34 +1555,42 @@ func Test_DisconnectWhileProcessingIncomingPublish(t *testing.T) {
1545
1555
dm := packets .NewControlPacket (packets .Disconnect ).(* packets.DisconnectPacket )
1546
1556
err := dm .Write (s .conn )
1547
1557
if err != nil {
1548
- t .Fatalf ("error dending disconnect packet: %s" , err )
1558
+ t .Fatalf ("error sending disconnect packet: %s" , err )
1549
1559
}
1550
1560
1551
1561
// Lets give the library up to a second to shutdown (indicated by the status changing)
1562
+ delay = time .NewTimer (time .Second ) // Be careful with timers as this will be run in a tight loop!
1552
1563
select {
1553
1564
case <- sDisconnected : // All good
1554
- case <- time .After (time .Second ):
1555
- cancel () // no point leaving publisher running
1556
- time .Sleep (time .Second ) // Allow publish calls to timeout (otherwise there will be tons of go routines running!)
1565
+ if ! delay .Stop () {
1566
+ <- delay .C
1567
+ }
1568
+ case <- delay .C :
1569
+ cancel () // no point leaving publisher running
1570
+ time .Sleep (10 * time .Second ) // Allow publish calls to timeout (otherwise there will be tons of go routines running!)
1557
1571
buf := make ([]byte , 1 << 20 )
1558
1572
stacklen := runtime .Stack (buf , true )
1559
1573
t .Fatalf ("connection was not lost as expected - probable deadlock. Stacktrace follows: %s" , buf [:stacklen ])
1560
1574
}
1561
1575
1562
1576
cancel () // no point leaving publisher running
1563
1577
1578
+ delay = time .NewTimer (time .Second ) // Be careful with timers as this will be run in a tight loop!
1564
1579
select {
1565
1580
case <- pubDone :
1566
- case <- time .After (time .Second ):
1567
- t .Errorf ("pubdone not closed within a second" )
1581
+ if ! delay .Stop () {
1582
+ <- delay .C
1583
+ }
1584
+ case <- delay .C :
1585
+ t .Errorf ("pubdone not closed within two seconds (probably due to load on system but may be an issue)" )
1568
1586
}
1569
1587
p .Disconnect (250 ) // Close publisher
1570
1588
}
1571
1589
1572
1590
// Test_ResumeSubsMaxInflight - Check the MaxResumePubInFlight option.
1573
1591
// This is difficult to test without control of the broker (because we will be communicating via the broker not
1574
- // directly. However due to the way resume works when there is no limit to inflight messages message ordering is not
1575
- // guaranteed. However with SetMaxResumePubInFlight(1) it is guaranteed so we use that to test.
1592
+ // directly. However, due to the way resume works when there is no limit to inflight messages message ordering is not
1593
+ // guaranteed. However, with SetMaxResumePubInFlight(1) it is guaranteed so we use that to test.
1576
1594
// On my PC (using mosquitto under docker) running this without SetMaxResumePubInFlight(1) will fail with 1000 messages
1577
1595
// (generally passes if only 100 are sent). With the option set it always passes.
1578
1596
func Test_ResumeSubsMaxInflight (t * testing.T ) {
0 commit comments