Add connection resilience and reconnect commands for Kosmi and Jackbox
Kosmi WebSocket would silently die after hours/days with no reconnection. Jackbox WebSocket failed to reconnect after API server restarts (stale JWT) and leaked heartbeat goroutines on each reconnect cycle. Kosmi changes: - Add WebSocket ping/pong keepalive (30s ping, 90s read deadline) - Send EventFailure on unexpected disconnect to trigger gateway reconnectBridge() - Add intentionalDisconnect flag to prevent false failure events on clean shutdown - Fix Disconnect() to be safe for reconnect cycles Jackbox changes: - Add read deadline (90s) to detect stale connections - Fix heartbeat goroutine leak via per-connection listenDone channel - Re-authenticate for fresh JWT before each reconnect attempt - Add Manager.Reconnect() for on-demand teardown and rebuild IRC commands: - !kreconnect - reconnect Kosmi bridge - !jreconnect - reconnect Jackbox WebSocket - !reconnect - reconnect all services (Kosmi + Jackbox) Made-with: Cursor
This commit is contained in:
@@ -242,6 +242,37 @@ func (m *Manager) monitorActiveSessions() {
|
||||
}
|
||||
}
|
||||
|
||||
// Reconnect tears down the existing WebSocket client and establishes a new
|
||||
// connection (re-authenticating for a fresh JWT). Safe to call even when
|
||||
// already disconnected.
|
||||
func (m *Manager) Reconnect() error {
|
||||
if !m.enabled || !m.useWebSocket {
|
||||
return fmt.Errorf("Jackbox WebSocket is not enabled")
|
||||
}
|
||||
|
||||
m.log.Info("Forcing Jackbox WebSocket reconnection...")
|
||||
|
||||
// Tear down existing client
|
||||
if m.wsClient != nil {
|
||||
if err := m.wsClient.Close(); err != nil {
|
||||
m.log.Errorf("Error closing existing WebSocket client: %v", err)
|
||||
}
|
||||
m.wsClient = nil
|
||||
}
|
||||
|
||||
// Re-authenticate for a fresh JWT
|
||||
if err := m.client.Authenticate(); err != nil {
|
||||
return fmt.Errorf("re-authentication failed: %w", err)
|
||||
}
|
||||
|
||||
// Rebuild the WebSocket client using the original callback
|
||||
if m.messageCallback == nil {
|
||||
return fmt.Errorf("no message callback registered")
|
||||
}
|
||||
|
||||
return m.startWebSocketClient(m.messageCallback)
|
||||
}
|
||||
|
||||
// GetClient returns the Jackbox API client (may be nil if disabled)
|
||||
func (m *Manager) GetClient() *Client {
|
||||
return m.client
|
||||
|
||||
@@ -10,6 +10,11 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
jackboxPingInterval = 30 * time.Second
|
||||
jackboxReadTimeout = 90 * time.Second
|
||||
)
|
||||
|
||||
// WebSocketClient handles WebSocket connection to Jackbox API
|
||||
type WebSocketClient struct {
|
||||
apiURL string
|
||||
@@ -22,6 +27,7 @@ type WebSocketClient struct {
|
||||
reconnectDelay time.Duration
|
||||
maxReconnect time.Duration
|
||||
stopChan chan struct{}
|
||||
listenDone chan struct{} // closed when the current listen() goroutine exits
|
||||
connected bool
|
||||
authenticated bool
|
||||
subscribedSession int
|
||||
@@ -109,8 +115,11 @@ func (c *WebSocketClient) Connect() error {
|
||||
|
||||
c.conn = conn
|
||||
c.connected = true
|
||||
c.listenDone = make(chan struct{})
|
||||
c.log.Info("WebSocket connected")
|
||||
|
||||
conn.SetReadDeadline(time.Now().Add(jackboxReadTimeout))
|
||||
|
||||
// Start message listener
|
||||
go c.listen()
|
||||
|
||||
@@ -181,9 +190,9 @@ func (c *WebSocketClient) Unsubscribe(sessionID int) error {
|
||||
// listen handles incoming WebSocket messages
|
||||
func (c *WebSocketClient) listen() {
|
||||
defer c.handleDisconnect()
|
||||
defer close(c.listenDone)
|
||||
|
||||
// Start heartbeat
|
||||
go c.startHeartbeat()
|
||||
go c.startHeartbeat(c.listenDone)
|
||||
|
||||
for {
|
||||
select {
|
||||
@@ -196,6 +205,9 @@ func (c *WebSocketClient) listen() {
|
||||
return
|
||||
}
|
||||
|
||||
// Reset read deadline on every successful read
|
||||
c.conn.SetReadDeadline(time.Now().Add(jackboxReadTimeout))
|
||||
|
||||
c.handleMessage(message)
|
||||
}
|
||||
}
|
||||
@@ -438,15 +450,18 @@ func (c *WebSocketClient) AnnounceSessionEnd() {
|
||||
}
|
||||
}
|
||||
|
||||
// startHeartbeat sends ping messages periodically
|
||||
func (c *WebSocketClient) startHeartbeat() {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
// startHeartbeat sends ping messages periodically. It exits when listenDone
|
||||
// is closed (current connection ended) or stopChan is closed (full shutdown).
|
||||
func (c *WebSocketClient) startHeartbeat(listenDone <-chan struct{}) {
|
||||
ticker := time.NewTicker(jackboxPingInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-c.stopChan:
|
||||
return
|
||||
case <-listenDone:
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.mu.Lock()
|
||||
if c.connected && c.conn != nil {
|
||||
@@ -474,7 +489,9 @@ func (c *WebSocketClient) sendMessage(msg WSMessage) error {
|
||||
return c.conn.WriteMessage(websocket.TextMessage, data)
|
||||
}
|
||||
|
||||
// handleDisconnect handles connection loss and attempts reconnection
|
||||
// handleDisconnect handles connection loss and attempts reconnection with
|
||||
// exponential backoff. Before each attempt it re-authenticates via the HTTP
|
||||
// API to obtain a fresh JWT (the old token may be invalid after a server restart).
|
||||
func (c *WebSocketClient) handleDisconnect() {
|
||||
c.mu.Lock()
|
||||
c.connected = false
|
||||
@@ -487,7 +504,6 @@ func (c *WebSocketClient) handleDisconnect() {
|
||||
|
||||
c.log.Warn("WebSocket disconnected, attempting to reconnect...")
|
||||
|
||||
// Exponential backoff reconnection
|
||||
delay := c.reconnectDelay
|
||||
for {
|
||||
select {
|
||||
@@ -496,21 +512,27 @@ func (c *WebSocketClient) handleDisconnect() {
|
||||
case <-time.After(delay):
|
||||
c.log.Infof("Reconnecting... (delay: %v)", delay)
|
||||
|
||||
// Re-authenticate to get a fresh JWT token before reconnecting.
|
||||
if c.apiClient != nil {
|
||||
if err := c.apiClient.Authenticate(); err != nil {
|
||||
c.log.Errorf("Re-authentication failed: %v (will retry)", err)
|
||||
delay = c.bumpDelay(delay)
|
||||
continue
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.token = c.apiClient.GetToken()
|
||||
c.mu.Unlock()
|
||||
c.log.Info("Re-authenticated with fresh JWT token")
|
||||
}
|
||||
|
||||
if err := c.Connect(); err != nil {
|
||||
c.log.Errorf("Reconnection failed: %v", err)
|
||||
|
||||
// Increase delay with exponential backoff
|
||||
delay *= 2
|
||||
if delay > c.maxReconnect {
|
||||
delay = c.maxReconnect
|
||||
}
|
||||
delay = c.bumpDelay(delay)
|
||||
continue
|
||||
}
|
||||
|
||||
// Reconnected successfully
|
||||
c.log.Info("Reconnected successfully")
|
||||
|
||||
// Re-subscribe if we were subscribed before
|
||||
if c.subscribedSession > 0 {
|
||||
if err := c.Subscribe(c.subscribedSession); err != nil {
|
||||
c.log.Errorf("Failed to re-subscribe: %v", err)
|
||||
@@ -522,6 +544,14 @@ func (c *WebSocketClient) handleDisconnect() {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *WebSocketClient) bumpDelay(d time.Duration) time.Duration {
|
||||
d *= 2
|
||||
if d > c.maxReconnect {
|
||||
d = c.maxReconnect
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// Close closes the WebSocket connection
|
||||
func (c *WebSocketClient) Close() error {
|
||||
c.mu.Lock()
|
||||
|
||||
Reference in New Issue
Block a user