Add !votes command, fix vote tally timing, and improve Kosmi stability

- Add !votes command (IRC + Kosmi) showing per-session and all-time vote
  breakdowns for the current game via new Jackbox API endpoints
  (GET sessions/{id}/games, sessions/{id}/votes, games/{id})
- Fix vote tally broadcasting: remove debounce timer, announce tallies
  only at game transitions or session end instead of after every vote
- Add !kreconnect IRC command to manually trigger Kosmi reconnection
- Add WebSocket ping/pong keepalive and write mutex to Kosmi client
  for connection stability
- Add watchConnection() auto-reconnect on unexpected Kosmi disconnects
- Remove old 2025-10-31 chat summaries; add votes command design doc

Made-with: Cursor
This commit is contained in:
cottongin
2026-03-16 20:56:18 -04:00
parent 1831b0e923
commit 88cc140087
15 changed files with 536 additions and 1398 deletions

View File

@@ -28,6 +28,8 @@ const (
EventUserTyping = "user_typing"
EventGetChannelMembers = "get_channel_members"
EventNoticeIRC = "notice_irc"
EventReconnectKosmi = "reconnect_kosmi"
EventVotesQuery = "votes_query"
)
const ParentIDNotFound = "msg-parent-not-found"

View File

@@ -269,6 +269,32 @@ func (b *Birc) handlePrivMsg(client *girc.Client, event girc.Event) {
}
}
// Handle !kreconnect command: trigger Kosmi bridge reconnection
if strings.TrimSpace(rmsg.Text) == "!kreconnect" {
b.Log.Infof("!kreconnect command from %s on %s", event.Source.Name, rmsg.Channel)
b.Remote <- config.Message{
Username: "system",
Text: "kreconnect",
Channel: rmsg.Channel,
Account: b.Account,
Event: config.EventReconnectKosmi,
}
return
}
// Handle !votes command: query current game vote tally
if strings.TrimSpace(rmsg.Text) == "!votes" {
b.Log.Infof("!votes command from %s on %s", event.Source.Name, rmsg.Channel)
b.Remote <- config.Message{
Username: "system",
Text: "votes",
Channel: rmsg.Channel,
Account: b.Account,
Event: config.EventVotesQuery,
}
return
}
b.Log.Debugf("<= Sending message from %s on %s to gateway", event.Params[0], b.Account)
b.Remote <- rmsg
}

View File

@@ -24,10 +24,8 @@ type Client struct {
messageCallback func(string)
// Vote tracking
activeSessionID int
lastVoteResponse *VoteResponse
voteDebounceTimer *time.Timer
voteDebounceDelay time.Duration
activeSessionID int
lastVoteResponse *VoteResponse
}
// AuthResponse represents the authentication response from the API
@@ -67,6 +65,44 @@ type SessionResponse struct {
Session *Session `json:"session"`
}
// SessionGame represents a game within a session
type SessionGame struct {
ID int `json:"id"`
GameID int `json:"game_id"`
Title string `json:"title"`
PackName string `json:"pack_name"`
Status string `json:"status"`
RoomCode string `json:"room_code"`
}
// SessionVotesResponse represents the per-game vote breakdown for a session
type SessionVotesResponse struct {
SessionID int `json:"session_id"`
Votes []GameVoteSummary `json:"votes"`
}
// GameVoteSummary represents aggregated vote data for a single game in a session
type GameVoteSummary struct {
GameID int `json:"game_id"`
Title string `json:"title"`
PackName string `json:"pack_name"`
Upvotes int `json:"upvotes"`
Downvotes int `json:"downvotes"`
NetScore int `json:"net_score"`
TotalVotes int `json:"total_votes"`
}
// Game represents a game from the catalog
type Game struct {
ID int `json:"id"`
Title string `json:"title"`
PackName string `json:"pack_name"`
PopularityScore int `json:"popularity_score"`
Upvotes int `json:"upvotes"`
Downvotes int `json:"downvotes"`
PlayCount int `json:"play_count"`
}
// NewClient creates a new Jackbox API client
func NewClient(apiURL, adminPassword string, log *logrus.Entry) *Client {
return &Client{
@@ -76,7 +112,6 @@ func NewClient(apiURL, adminPassword string, log *logrus.Entry) *Client {
httpClient: &http.Client{
Timeout: 10 * time.Second,
},
voteDebounceDelay: 3 * time.Second, // Wait 3 seconds after last vote before broadcasting
}
}
@@ -102,13 +137,6 @@ func (c *Client) GetAndClearLastVoteResponse() *VoteResponse {
resp := c.lastVoteResponse
c.lastVoteResponse = nil
// Stop any pending debounce timer
if c.voteDebounceTimer != nil {
c.voteDebounceTimer.Stop()
c.voteDebounceTimer = nil
}
return resp
}
@@ -267,8 +295,21 @@ func (c *Client) SendVote(username, voteType string, timestamp time.Time) error
c.log.Debugf("Vote recorded for %s: %s - %d👍 %d👎",
voteResp.Game.Title, username, voteResp.Game.Upvotes, voteResp.Game.Downvotes)
// Debounce vote broadcasts - wait for activity to settle
c.debouncedVoteBroadcast(&voteResp)
// Accumulate vote; tally announced at game change or session end
c.storeVoteResponse(&voteResp)
// If local session tracking is stale, sync from the API.
// A successful vote means the API has an active session.
c.mu.RLock()
sessionID := c.activeSessionID
c.mu.RUnlock()
if sessionID == 0 {
go func() {
if session, err := c.GetActiveSession(); err == nil && session != nil {
c.SetActiveSession(session.ID)
}
}()
}
return nil
}
@@ -351,49 +392,168 @@ func (c *Client) GetActiveSession() (*Session, error) {
return &session, nil
}
// debouncedVoteBroadcast implements debouncing for vote broadcasts
// When there's an active session, it stores votes to be announced with the next game
// When there's no active session, it uses time-based debouncing (3 seconds)
func (c *Client) debouncedVoteBroadcast(voteResp *VoteResponse) {
// storeVoteResponse accumulates the latest vote response silently.
// The tally is announced later by handleGameAdded or AnnounceSessionEnd
// via GetAndClearLastVoteResponse.
func (c *Client) storeVoteResponse(voteResp *VoteResponse) {
c.mu.Lock()
defer c.mu.Unlock()
// Store the latest vote response
c.lastVoteResponse = voteResp
// If there's an active session, just accumulate votes silently
// They'll be announced when the next game is picked
if c.activeSessionID > 0 {
c.log.Debugf("Vote accumulated for %s (session active, will announce with next game)", voteResp.Game.Title)
// Cancel any existing timer since we're in session mode
if c.voteDebounceTimer != nil {
c.voteDebounceTimer.Stop()
c.voteDebounceTimer = nil
}
return
}
// No active session - use time-based debouncing
// If there's an existing timer, stop it
if c.voteDebounceTimer != nil {
c.voteDebounceTimer.Stop()
}
// Create a new timer that will fire after the debounce delay
c.voteDebounceTimer = time.AfterFunc(c.voteDebounceDelay, func() {
c.mu.Lock()
lastResp := c.lastVoteResponse
c.lastVoteResponse = nil
c.mu.Unlock()
if lastResp != nil {
// Broadcast the final vote result
message := fmt.Sprintf("🗳️ Voting complete for %s • %d👍 %d👎 (Score: %d)",
lastResp.Game.Title,
lastResp.Game.Upvotes, lastResp.Game.Downvotes, lastResp.Game.PopularityScore)
c.broadcastMessage(message)
c.log.Infof("Broadcast final vote result: %s - %d👍 %d👎",
lastResp.Game.Title, lastResp.Game.Upvotes, lastResp.Game.Downvotes)
}
})
c.log.Debugf("Vote accumulated for %s (will announce at game change or session end)", voteResp.Game.Title)
}
// GetSessionGames retrieves the list of games in a session
func (c *Client) GetSessionGames(sessionID int) ([]SessionGame, error) {
if err := c.ensureAuthenticated(); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
c.mu.RLock()
token := c.token
c.mu.RUnlock()
url := fmt.Sprintf("%s/api/sessions/%d/games", c.apiURL, sessionID)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+token)
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode == http.StatusUnauthorized {
c.log.Warn("Token expired, re-authenticating...")
if err := c.Authenticate(); err != nil {
return nil, fmt.Errorf("re-authentication failed: %w", err)
}
return c.GetSessionGames(sessionID)
}
if resp.StatusCode == http.StatusNotFound {
return nil, nil
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var games []SessionGame
if err := json.Unmarshal(body, &games); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return games, nil
}
// GetSessionVotes retrieves the per-game vote breakdown for a session
func (c *Client) GetSessionVotes(sessionID int) (*SessionVotesResponse, error) {
if err := c.ensureAuthenticated(); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
c.mu.RLock()
token := c.token
c.mu.RUnlock()
url := fmt.Sprintf("%s/api/sessions/%d/votes", c.apiURL, sessionID)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+token)
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode == http.StatusUnauthorized {
c.log.Warn("Token expired, re-authenticating...")
if err := c.Authenticate(); err != nil {
return nil, fmt.Errorf("re-authentication failed: %w", err)
}
return c.GetSessionVotes(sessionID)
}
if resp.StatusCode == http.StatusNotFound {
return nil, nil
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var votesResp SessionVotesResponse
if err := json.Unmarshal(body, &votesResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return &votesResp, nil
}
// GetGame retrieves a single game from the catalog by ID
func (c *Client) GetGame(gameID int) (*Game, error) {
if err := c.ensureAuthenticated(); err != nil {
return nil, fmt.Errorf("authentication failed: %w", err)
}
c.mu.RLock()
token := c.token
c.mu.RUnlock()
url := fmt.Sprintf("%s/api/games/%d", c.apiURL, gameID)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+token)
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode == http.StatusUnauthorized {
c.log.Warn("Token expired, re-authenticating...")
if err := c.Authenticate(); err != nil {
return nil, fmt.Errorf("re-authentication failed: %w", err)
}
return c.GetGame(gameID)
}
if resp.StatusCode == http.StatusNotFound {
return nil, nil
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var game Game
if err := json.Unmarshal(body, &game); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return &game, nil
}

View File

@@ -14,10 +14,14 @@ import (
)
const (
kosmiWSURL = "wss://engine.kosmi.io/gql-ws"
kosmiWSURL = "wss://engine.kosmi.io/gql-ws"
kosmiHTTPURL = "https://engine.kosmi.io/"
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
appVersion = "4364"
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
appVersion = "4364"
pingInterval = 30 * time.Second
pongTimeout = 90 * time.Second
writeWait = 10 * time.Second
)
// GraphQL-WS Protocol message types
@@ -40,6 +44,7 @@ type GraphQLWSClient struct {
messageCallback func(*NewMessagePayload)
connected bool
mu sync.RWMutex
writeMu sync.Mutex
done chan struct{}
}
@@ -208,9 +213,17 @@ func (c *GraphQLWSClient) Connect() error {
c.connected = true
c.mu.Unlock()
// Set up ping/pong keepalive
conn.SetReadDeadline(time.Now().Add(pongTimeout))
conn.SetPongHandler(func(string) error {
conn.SetReadDeadline(time.Now().Add(pongTimeout))
return nil
})
c.log.Info("Native WebSocket client connected and ready")
// Start message listener
// Start keepalive pinger and message listener
go c.startPing()
go c.listenForMessages()
return nil
@@ -359,7 +372,10 @@ func (c *GraphQLWSClient) SendMessage(text string) error {
},
}
if err := c.conn.WriteJSON(msg); err != nil {
c.writeMu.Lock()
err := c.conn.WriteJSON(msg)
c.writeMu.Unlock()
if err != nil {
return fmt.Errorf("failed to send message: %w", err)
}
@@ -396,3 +412,33 @@ func (c *GraphQLWSClient) IsConnected() bool {
return c.connected
}
// Done returns a channel that is closed when the client disconnects
func (c *GraphQLWSClient) Done() <-chan struct{} {
return c.done
}
// startPing sends WebSocket ping frames at a regular interval to keep the
// connection alive and detect stale connections early.
func (c *GraphQLWSClient) startPing() {
ticker := time.NewTicker(pingInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
c.writeMu.Lock()
err := c.conn.WriteControl(
websocket.PingMessage, nil, time.Now().Add(writeWait),
)
c.writeMu.Unlock()
if err != nil {
c.log.Warnf("Ping failed, connection likely dead: %v", err)
c.conn.Close()
return
}
case <-c.done:
return
}
}
}

View File

@@ -22,25 +22,25 @@ type KosmiClient interface {
SendMessage(text string) error
OnMessage(callback func(*NewMessagePayload))
IsConnected() bool
Done() <-chan struct{}
}
// Bkosmi represents the Kosmi bridge
type Bkosmi struct {
*bridge.Config
client KosmiClient
roomID string
roomURL string
connected bool
authDone bool // Signals that authentication is complete (like IRC bridge)
msgChannel chan config.Message
jackboxClient *jackbox.Client
client KosmiClient
roomID string
roomURL string
connected bool
intentionalDisconnect bool
authDone bool // Signals that authentication is complete (like IRC bridge)
jackboxClient *jackbox.Client
}
// New creates a new Kosmi bridge instance
func New(cfg *bridge.Config) bridge.Bridger {
b := &Bkosmi{
Config: cfg,
msgChannel: make(chan config.Message, 100),
Config: cfg,
}
return b
@@ -110,9 +110,12 @@ func (b *Bkosmi) Connect() error {
}
b.connected = true
b.intentionalDisconnect = false
b.authDone = true // Signal that authentication is complete
b.Log.Info("Successfully connected to Kosmi")
go b.watchConnection()
return nil
}
@@ -120,15 +123,15 @@ func (b *Bkosmi) Connect() error {
func (b *Bkosmi) Disconnect() error {
b.Log.Info("Disconnecting from Kosmi")
b.intentionalDisconnect = true
b.connected = false
if b.client != nil {
if err := b.client.Disconnect(); err != nil {
b.Log.Errorf("Error closing Kosmi client: %v", err)
}
}
close(b.msgChannel)
b.connected = false
return nil
}
@@ -216,6 +219,19 @@ func (b *Bkosmi) handleIncomingMessage(payload *NewMessagePayload) {
}
}
// Handle !votes command: query current game vote tally
if strings.TrimSpace(body) == "!votes" {
b.Log.Infof("!votes command from %s", username)
b.Remote <- config.Message{
Username: "system",
Text: "votes",
Channel: "main",
Account: b.Account,
Event: config.EventVotesQuery,
}
return
}
// Create Matterbridge message
// Use "main" as the channel name for gateway matching
// Don't add prefix here - let the gateway's RemoteNickFormat handle it
@@ -240,6 +256,30 @@ func (b *Bkosmi) handleIncomingMessage(payload *NewMessagePayload) {
b.Remote <- rmsg
}
// watchConnection monitors the WebSocket client and sends EventFailure
// to the gateway when an unexpected disconnect occurs, triggering automatic
// reconnection via the gateway's reconnectBridge() mechanism.
func (b *Bkosmi) watchConnection() {
<-b.client.Done()
if b.intentionalDisconnect {
return
}
b.Log.Warn("Kosmi connection lost unexpectedly, requesting reconnection")
b.connected = false
if b.Remote != nil {
b.Remote <- config.Message{
Username: "system",
Text: "reconnect",
Channel: "",
Account: b.Account,
Event: config.EventFailure,
}
}
}
// extractRoomID extracts the room ID from a Kosmi room URL
// Supports formats:
// - https://app.kosmi.io/room/@roomname

View File

@@ -1,218 +0,0 @@
# Chat Summary: WebSocket Hook Fix - 2025-10-31 00:06:47
## Session Overview
**Date**: October 31, 2025, 00:06:47
**Task**: Fix message interception in the Kosmi bridge to ensure messages are captured correctly
**Status**: ✅ **COMPLETED AND VERIFIED**
## Problem Statement
The Kosmi bridge was successfully connecting to the room via headless Chrome, but messages sent in the Kosmi chat were not appearing in the bridge output. The logs showed:
```
INFO ✓ WebSocket hook confirmed installed
INFO Status: No WebSocket connection detected yet
```
This indicated that while the WebSocket interception script was being injected, it was not capturing the WebSocket connection that Kosmi was creating.
## Root Cause
The WebSocket hook was being injected **after** the page loaded, which meant:
1. Kosmi's JavaScript had already created the WebSocket connection
2. Our hook script ran too late to intercept the `window.WebSocket` constructor
3. Messages were flowing through the WebSocket but our interceptor never saw them
## Solution
### Key Insight from Chrome Extension
Examining `.examples/chrome-extension/inject.js` revealed the correct approach:
1. **Hook the raw `window.WebSocket` constructor** (not Apollo Client or other abstractions)
2. **Wrap both `addEventListener` and `onmessage`** to capture messages regardless of how Kosmi's code listens
3. **Inject the hook BEFORE any page scripts run**
### Critical Implementation Change
Changed from post-load injection:
```go
// ❌ WRONG - Too late!
chromedp.Run(ctx,
chromedp.Navigate(roomURL),
chromedp.WaitReady("body"),
chromedp.Evaluate(hookScript, nil), // WebSocket already created!
)
```
To pre-load injection using Chrome DevTools Protocol:
```go
// ✅ CORRECT - Runs before page scripts!
chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error {
_, err := page.AddScriptToEvaluateOnNewDocument(hookScript).Do(ctx)
return err
}))
chromedp.Run(ctx,
chromedp.Navigate(roomURL),
chromedp.WaitReady("body"),
)
```
### Updated Method in chromedp_client.go
```go
func (c *ChromeDPClient) injectWebSocketHookBeforeLoad() error {
script := c.getWebSocketHookScript()
return chromedp.Run(c.ctx, chromedp.ActionFunc(func(ctx context.Context) error {
// Use Page.addScriptToEvaluateOnNewDocument to inject before page load
// This is the proper way to inject scripts that run before page JavaScript
_, err := page.AddScriptToEvaluateOnNewDocument(script).Do(ctx)
return err
}))
}
```
## Verification
After applying the fix, the test program showed:
```
INFO[2025-10-31T00:02:39-04:00] Injecting WebSocket interceptor (runs before page load)...
INFO[2025-10-31T00:02:40-04:00] Navigating to Kosmi room: https://app.kosmi.io/room/@hyperspaceout
INFO[2025-10-31T00:02:41-04:00] ✓ WebSocket hook confirmed installed
INFO[2025-10-31T00:02:44-04:00] Status: WebSocket connection intercepted ← SUCCESS!
INFO[2025-10-31T00:02:44-04:00] Successfully connected to Kosmi via Chrome
INFO[2025-10-31T00:02:45-04:00] Processing 43 messages from queue
INFO[2025-10-31T00:02:51-04:00] Received message: [00:02:51] cottongin: [Kosmi] <cottongin> okay
INFO[2025-10-31T00:02:55-04:00] Received message: [00:02:55] cottongin: [Kosmi] <cottongin> it works
```
✅ Messages now appear in real-time!
## Files Modified
### 1. bridge/kosmi/chromedp_client.go
**Change**: Updated `injectWebSocketHookBeforeLoad()` to use `page.AddScriptToEvaluateOnNewDocument`
```go
func (c *ChromeDPClient) injectWebSocketHookBeforeLoad() error {
script := c.getWebSocketHookScript()
return chromedp.Run(c.ctx, chromedp.ActionFunc(func(ctx context.Context) error {
_, err := page.AddScriptToEvaluateOnNewDocument(script).Do(ctx)
return err
}))
}
```
**Impact**: This is the core fix that ensures the WebSocket hook runs before any page JavaScript.
### 2. QUICKSTART.md
**Changes**:
- Added Chrome/Chromium as a prerequisite
- Updated expected output to show ChromeDP-specific messages
- Updated troubleshooting section with Chrome-specific checks
- Added new troubleshooting section for message interception issues
- Updated dependency installation to use `chromedp` instead of `gorilla/websocket`
### 3. README.md
**Changes**:
- Added "Headless Chrome automation" and "WebSocket interception using Chrome DevTools Protocol" to features
- Updated architecture section to explain the ChromeDP approach
- Added "Why Headless Chrome?" section explaining the rationale
- Added Chrome/Chromium to prerequisites
- Updated "How It Works" section to describe the ChromeDP flow
- Added "Critical Implementation Detail" section about pre-load injection
- Updated message flow diagram
- Updated file structure to include `chromedp_client.go`
- Updated troubleshooting to include Chrome-specific checks
### 4. LESSONS_LEARNED.md (NEW)
**Purpose**: Comprehensive documentation of the WebSocket interception problem and solution
**Contents**:
- Problem description and evolution of approaches
- Detailed explanation of why post-load injection fails
- Complete code examples of wrong vs. correct approaches
- Implementation details in chromedp_client.go
- Verification steps
- Key takeaways
- How to apply this pattern to other projects
## Key Takeaways
1. **Timing is Critical**: WebSocket interception must happen before the WebSocket is created
2. **Use the Right CDP Method**: `Page.addScriptToEvaluateOnNewDocument` is specifically designed for pre-page-load injection
3. **Hook at the Lowest Level**: Hook `window.WebSocket` constructor, not higher-level abstractions
4. **Reference Working Code**: The Chrome extension's `inject.js` was the key to understanding the correct approach
5. **Verify with Diagnostics**: Status checks like "WebSocket connection intercepted" are essential for debugging
## Impact on Full Matterbridge Integration
**No additional changes needed!**
The fix in `chromedp_client.go` automatically applies to:
- The test program (`cmd/test-kosmi/main.go`)
- The full Matterbridge integration (`bridge/kosmi/kosmi.go`)
Both use the same `ChromeDPClient` implementation, so the fix works everywhere.
## Testing Recommendations
To verify the bridge is working correctly:
1. **Check connection status**:
```
✓ WebSocket hook confirmed installed
Status: WebSocket connection intercepted
```
2. **Send a test message** in the Kosmi room from a browser
3. **Verify message appears** in the bridge output:
```
INFO Received message: [HH:MM:SS] username: [Kosmi] <username> message
```
## References
- Chrome DevTools Protocol: https://chromedevtools.github.io/devtools-protocol/
- `Page.addScriptToEvaluateOnNewDocument`: https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-addScriptToEvaluateOnNewDocument
- chromedp documentation: https://pkg.go.dev/github.com/chromedp/chromedp
- Original Chrome extension: `.examples/chrome-extension/inject.js`
## Next Steps
With message reception now working, the bridge is ready for:
1. ✅ **Testing message relay**: Kosmi → IRC (receiving works)
2. 🔄 **Testing message sending**: IRC → Kosmi (needs testing)
3. 🔄 **Full integration**: Setting up with real IRC server
4. 🔄 **Production deployment**: Running as a service
## Conclusion
The fix was a single-line change to use the correct Chrome DevTools Protocol method, but it required deep understanding of:
- Browser execution order
- WebSocket lifecycle
- Chrome DevTools Protocol capabilities
- The difference between post-load and pre-load script injection
This lesson learned is now documented in `LESSONS_LEARNED.md` for future reference and can be applied to any project requiring browser API interception in headless automation.
---
**Session Duration**: ~30 minutes
**Messages Exchanged**: 1 user message requesting the fix be applied to the full relay
**Outcome**: ✅ Complete success - messages now flow correctly through the bridge

View File

@@ -1,267 +0,0 @@
# Chat Summary: Native WebSocket Investigation - 2025-10-31 09:43:00
## Session Overview
**Date**: October 31, 2025, 09:43:00
**Task**: Reverse engineer Kosmi WebSocket API to replace ChromeDP with native Go client
**Status**: ⚠️ **BLOCKED - WebSocket server requires browser context**
## Problem Statement
The goal was to replace the resource-heavy ChromeDP implementation (~100-200MB RAM, 3-5s startup) with a lightweight native Go WebSocket client (~10-20MB RAM, <1s startup).
## Investigation Summary
### Phase 1: Authentication Data Capture ✅
Created `cmd/capture-auth/main.go` to intercept and log all authentication data from a working ChromeDP session.
**Key Findings**:
1. **JWT Token Discovery**: WebSocket uses JWT token in `connection_init` payload
2. **Token Structure**:
```json
{
"aud": "kosmi",
"exp": 1793367309, // 1 YEAR expiration!
"sub": "a067ec32-ad5c-4831-95cc-0f88bdb33587", // Anonymous user ID
"typ": "access"
}
```
3. **Connection Init Format**:
```json
{
"type": "connection_init",
"payload": {
"token": "eyJhbGc...", // JWT token
"ua": "TW96aWxs...", // Base64-encoded User-Agent
"v": "4364", // App version
"r": "" // Room (empty for anonymous)
}
}
```
4. **No Cookies Required**: The `g_state` cookie is not needed for WebSocket auth
**Output**: `auth-data.json` with 104 WebSocket frames captured, 77 network requests logged
### Phase 2: Direct Connection Tests ❌
Created three test programs to attempt native WebSocket connections:
**Test 1**: `cmd/test-websocket/main.go`
- Mode 1: With JWT token
- Mode 2: No authentication
- Mode 3: Origin header only
**Test 2**: `cmd/test-websocket-direct/main.go`
- Direct WebSocket with captured JWT token
- All required headers (Origin, User-Agent, etc.)
**Test 3**: `cmd/test-session/main.go`
- Visit room page first to establish session
- Use cookies from session
- Connect WebSocket with token
**Results**: ALL tests returned `403 Forbidden` during WebSocket handshake
### Phase 3: Root Cause Analysis 🔍
**The Problem**:
- 403 occurs during WebSocket **handshake**, BEFORE `connection_init`
- This means the server rejects the connection based on the CLIENT, not the authentication
- ChromeDP works because it's a real browser
- Native Go client is detected and blocked
**Likely Causes**:
1. **TLS Fingerprinting**: Go's TLS implementation has a different fingerprint than Chrome
2. **Cloudflare Protection**: Server uses bot detection (Captcha/challenge)
3. **WebSocket Extensions**: Browser sends specific extensions we're not replicating
4. **CDN Security**: Via header shows "1.1 Caddy" - reverse proxy with security rules
**Evidence**:
```
Response headers from 403:
Cache-Control: [max-age=0, private, must-revalidate]
Server: [Cowboy]
Via: [1.1 Caddy]
Alt-Svc: [h3=":443"; ma=2592000]
```
## Files Created
1. `cmd/capture-auth/main.go` - Authentication data capture tool
2. `cmd/test-websocket/main.go` - Multi-mode WebSocket test tool
3. `cmd/test-websocket-direct/main.go` - Direct token-based test
4. `cmd/test-session/main.go` - Session-based connection test
5. `AUTH_FINDINGS.md` - Detailed authentication documentation
6. `WEBSOCKET_403_ANALYSIS.md` - Comprehensive 403 error analysis
7. `auth-data.json` - Captured authentication data (104 WS frames)
## Key Insights
### What We Learned
1. **Kosmi uses standard JWT authentication** - Well-documented format
2. **Tokens are long-lived** - 1 year expiration means minimal refresh needs
3. **Anonymous access works** - No login credentials needed
4. **GraphQL-WS protocol** - Standard protocol, not proprietary
5. **The blocker is NOT authentication** - It's client detection/fingerprinting
### Why ChromeDP Works
ChromeDP bypasses all protection because it:
- ✅ Is literally Chrome (correct TLS fingerprint)
- ✅ Executes JavaScript (passes challenges)
- ✅ Has complete browser context
- ✅ Sends all expected headers/extensions
- ✅ Looks like a real user to security systems
## Recommendations
### Option A: Optimize ChromeDP (RECOMMENDED ⭐)
**Rationale**:
- It's the ONLY approach that works 100%
- Security bypass is likely impossible without reverse engineering Cloudflare
- 100-200MB RAM is acceptable for a bridge service
- Startup time is one-time cost
**Optimizations**:
```go
// Use headless-shell instead of full Chrome (~50MB savings)
FROM chromedp/headless-shell:latest
// Reduce memory footprint
chromedp.Flag("single-process", true),
chromedp.Flag("disable-dev-shm-usage", true),
chromedp.Flag("disable-gpu", true),
// Keep instance alive (avoid restart cost)
type ChromeDPPool struct {
instance *ChromeDPClient
mu sync.Mutex
}
```
**Expected Results**:
- Memory: ~100MB (vs ~200MB currently)
- Startup: 3-5s (one-time, then instant)
- Reliability: 100%
### Option B: Hybrid Token Caching
**IF** we could bypass 403 (which we can't):
```go
// Get token via ChromeDP once per year
token := getTokenViaChromeDPOnce()
cacheToken(token, 11*months)
// Use native WebSocket with cached token
conn := nativeWebSocketConnect(token)
```
**Problem**: Still returns 403, so this doesn't help
### Option C: HTTP POST Polling (FALLBACK)
From `FINDINGS.md` - HTTP POST works without authentication:
```bash
curl -X POST https://engine.kosmi.io/ \
-H "Content-Type: application/json" \
-d '{"query": "{ messages { id body } }"}'
```
**Pros**:
- ✅ No browser needed
- ✅ Lightweight
- ✅ No 403 errors
**Cons**:
- ❌ Not real-time (need to poll)
- ❌ Higher latency (1-2s minimum)
- ❌ More bandwidth
- ❌ Might still be rate-limited
## Decision Point
**Question for User**: Which approach do you prefer?
1. **Keep and optimize ChromeDP** (reliable, heavier)
- Stick with what works
- Optimize for memory/startup
- Accept ~100MB overhead
2. **Try HTTP POST polling** (lighter, but not real-time)
- Abandon WebSocket
- Poll every 1-2 seconds
- Accept latency trade-off
3. **Continue native WebSocket investigation** (might be futile)
- Attempt TLS fingerprint spoofing
- Try different Go TLS libraries
- Reverse engineer Cloudflare protection
- **Warning**: May never succeed
## Current Status
### Completed ✅
- [x] Capture authentication data from ChromeDP
- [x] Create test programs for direct WebSocket
- [x] Test all authentication combinations
- [x] Document findings and analysis
### Blocked ⚠️
- [ ] Implement native WebSocket client (403 Forbidden)
- [ ] Test message flow with native client (can't connect)
- [ ] Replace ChromeDP (no working alternative)
### Pending User Decision 🤔
- Which approach to pursue?
- Accept ChromeDP optimization?
- Try HTTP polling instead?
- Invest more time in security bypass?
## Files for Review
1. **AUTH_FINDINGS.md** - Complete authentication documentation
2. **WEBSOCKET_403_ANALYSIS.md** - Why native WebSocket fails
3. **auth-data.json** - Raw captured data
4. **cmd/capture-auth/** - Authentication capture tool
5. **cmd/test-*/** - Various test programs
## Next Steps (Pending Decision)
**If Option A (Optimize ChromeDP)**:
1. Research chromedp/headless-shell
2. Implement memory optimizations
3. Add Chrome instance pooling
4. Benchmark improvements
5. Update documentation
**If Option B (HTTP Polling)**:
1. Test HTTP POST queries
2. Implement polling loop
3. Handle rate limiting
4. Test latency impact
5. Document trade-offs
**If Option C (Continue Investigation)**:
1. Set up Wireshark to analyze browser traffic
2. Research TLS fingerprinting bypass
3. Test with different TLS libraries
4. Attempt Cloudflare bypass techniques
5. **Warning**: Success not guaranteed
## Conclusion
After extensive testing, **native Go WebSocket connections are blocked by Kosmi's infrastructure** (likely Cloudflare or similar). The ChromeDP approach, while heavier, is currently the **ONLY** working solution for real-time WebSocket communication.
**Recommendation**: Optimize ChromeDP rather than trying to bypass security measures.
---
**Time Spent**: ~2 hours
**Tests Performed**: 7 different connection methods
**Lines of Code**: ~800 (test tools + analysis)
**Outcome**: ChromeDP remains necessary for WebSocket access

View File

@@ -1,245 +0,0 @@
# Docker Deployment Success - Playwright Native Client
**Date**: October 31, 2025, 10:29 AM
**Status**: ✅ **FULLY OPERATIONAL**
## Summary
Successfully deployed the Kosmi/IRC relay bridge using Docker with the Playwright-assisted native client. The bridge is now running and connected to both platforms, ready to relay messages bidirectionally.
## Connection Status
```
✅ Kosmi WebSocket - CONNECTED
✅ IRC (zeronode.net:6697) - CONNECTED
✅ Bridge Gateway - ACTIVE
```
### Kosmi Connection
- Room ID: hyperspaceout
- Room URL: https://app.kosmi.io/room/@hyperspaceout
- WebSocket established successfully
- Subscribed to room messages
- Ready to send and receive
### IRC Connection
- Server: irc.zeronode.net:6697
- Channel: #cottongin
- Nickname: [from config]
- Connection successful
## Docker Configuration
### Final Dockerfile Solution
The key to success was using a **single-stage build** with the full Go environment:
```dockerfile
FROM golang:1.23-bookworm
# System dependencies for Playwright Chromium
RUN apt-get update && apt-get install -y \
ca-certificates chromium \
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 \
libcups2 libdrm2 libdbus-1-3 libxkbcommon0 \
libxcomposite1 libxdamage1 libxfixes3 libxrandr2 \
libgbm1 libasound2 libatspi2.0-0
# Build matterbridge
COPY . /app
WORKDIR /app
RUN go build -o matterbridge .
# Install playwright-go CLI and drivers
RUN go install github.com/playwright-community/playwright-go/cmd/playwright@latest && \
$(go env GOPATH)/bin/playwright install --with-deps chromium
ENTRYPOINT ["/app/matterbridge"]
CMD ["-conf", "/app/matterbridge.toml"]
```
### Why This Works
1. **Go Environment Preserved**: Playwright-go requires the full Go module cache and environment
2. **Driver Installation**: `playwright install` properly sets up the driver metadata
3. **System Dependencies**: All Chromium dependencies installed via apt
4. **Single Context**: No need to copy complex directory structures between build stages
### What Didn't Work
❌ Multi-stage builds with static binaries - Playwright-go needs its module cache
❌ Copying `/go/pkg/mod` manually - Missing driver metadata files
❌ Using Playwright Node.js Docker images - Different runtime environment
❌ Manual driver file copying - Complex embedded structure
## Testing the Relay
### How to Test
1. **Send a message in Kosmi** (https://app.kosmi.io/room/@hyperspaceout)
- Should appear in IRC channel #cottongin
2. **Send a message in IRC** (#cottongin)
- Should appear in Kosmi room
3. **Monitor logs:**
```bash
docker-compose logs -f
```
### Expected Log Output
```
level=info msg="Received message: [timestamp] username: message text"
level=info msg="Relaying message from kosmi to irc"
level=info msg="Sent message to IRC: message text"
```
## Architecture
```
┌─────────────────────┐
│ Kosmi Chat Room │
│ (@hyperspaceout) │
└──────────┬──────────┘
│ WebSocket
│ (GraphQL)
┌─────────────────────┐
│ Playwright Native │
│ Client │
│ │
│ • Browser Context │
│ • WS Interception │
│ • Direct WS Control │
└──────────┬──────────┘
┌─────────────────────┐
│ Matterbridge │
│ Core Gateway │
└──────────┬──────────┘
┌─────────────────────┐
│ IRC Bridge │
│ (zeronode.net) │
└──────────┬──────────┘
┌─────────────────────┐
│ IRC Channel │
│ #cottongin │
└─────────────────────┘
```
## Key Features
### Playwright Native Client
✅ **Browser-based WebSocket Setup**: Bypasses bot detection
✅ **Direct WebSocket Control**: No DOM manipulation needed
✅ **GraphQL Message Handling**: Native protocol support
✅ **Automatic Reconnection**: Built into Matterbridge
✅ **Message Queuing**: JavaScript-based message buffer
### Advantages Over ChromeDP
| Feature | ChromeDP | Playwright Native |
|---------|----------|-------------------|
| WebSocket Setup | ✓ | ✓ |
| Message Sending | DOM manipulation | Direct `ws.send()` |
| UI Dependency | High | None |
| Code Complexity | Medium | Low |
| Reliability | Good | Excellent |
| Docker Size | ~200MB | ~800MB¹ |
¹ Larger due to full Go environment, but more reliable
## Next Steps
### For Production Use
1. **Monitor Performance**:
```bash
docker stats kosmi-irc-relay
```
2. **Check for Memory Leaks**:
- Watch memory usage over 24+ hours
- Playwright keeps one browser instance open
3. **Configure Restart Policy**:
```yaml
restart: unless-stopped # ← Already configured
```
4. **Set Resource Limits** (optional):
```yaml
mem_limit: 1g
mem_reservation: 512m
```
5. **Backup Configuration**:
- `matterbridge.toml` contains all settings
- Room URL, IRC credentials, etc.
### For Testing
**Test sending messages NOW** while the bridge is running:
1. Open Kosmi room: https://app.kosmi.io/room/@hyperspaceout
2. Send a test message
3. Check IRC channel #cottongin
4. Send a message in IRC
5. Check Kosmi room
Watch the Docker logs to see messages being relayed:
```bash
docker-compose logs -f | grep -E "(Received|Sent|Relaying)"
```
## Troubleshooting
### If Bridge Disconnects
```bash
# View logs
docker-compose logs --tail=100
# Restart
docker-compose restart
# Full rebuild
docker-compose down
docker-compose up --build -d
```
### Common Issues
1. **WebSocket not connecting**: Check room URL in `matterbridge.toml`
2. **IRC auth failure**: Verify credentials in config
3. **High memory usage**: Normal for Playwright (100-200MB)
4. **Container keeps restarting**: Check logs for errors
## Files Modified
- `Dockerfile` - Single-stage build with Go environment
- `docker-compose.yml` - Already configured correctly
- `bridge/kosmi/native_client.go` - Playwright native implementation
- `bridge/kosmi/kosmi.go` - Uses `NewNativeClient`
## Success Metrics
✅ Kosmi WebSocket connected in ~7 seconds
✅ IRC connection successful
✅ Both channels joined
✅ Gateway started successfully
✅ Ready to relay messages bidirectionally
## Conclusion
The Playwright-assisted native client is now fully operational in Docker. The relay is ready to forward messages between Kosmi and IRC in real-time.
**The next step is to send actual test messages and verify bidirectional relay.**

View File

@@ -1,66 +0,0 @@
# WebSocket Mutation Issue - HTTP POST Solution
**Date**: October 31, 2025, 11:53 AM
**Issue**: IRC→Kosmi messages not appearing despite successful WebSocket send
## Problem Discovery
Messages from IRC were being sent to Kosmi's WebSocket successfully (we could see them in logs), but they were NOT appearing in the Kosmi chat interface.
### Root Cause
Through comprehensive logging of browser console messages, we discovered:
1. **WebSocket closes immediately after sending mutation**:
```
[Browser Console] >>> Sending mutation...
[Browser Console] >>> Sent successfully
[Browser Console] error: CloseEvent ← WebSocket closes!
```
2. **The WebSocket reopens** - indicating Kosmi is detecting an invalid message and resetting the connection
### Why WebSocket Mutations Fail
We're piggy-backing on Kosmi's native WebSocket connection (established by the web page). When we inject our own GraphQL mutations:
- We don't have proper authentication in the WebSocket frame
- We're interfering with Kosmi's protocol state machine
- The server detects this and closes the connection
## Solution: HTTP POST for Mutations
From FINDINGS.md (which was created earlier but we forgot about):
**Kosmi supports HTTP POST for GraphQL mutations!**
```
POST https://engine.kosmi.io/
Content-Type: application/json
{
"query": "mutation SendMessage($body: String!, $roomID: ID!) { sendMessage(body: $body, roomID: $roomID) { id } }",
"variables": {
"body": "message text",
"roomID": "room-id"
}
}
```
### Architecture
- **Receiving (Subscriptions)**: Use WebSocket ✅ (working)
- **Sending (Mutations)**: Use HTTP POST ✅ (to be implemented)
This is the same approach we initially documented but forgot to use!
## Implementation Plan
1. Replace `SendMessage` in `native_client.go` to use HTTP POST
2. Extract cookies from Playwright page context for authentication
3. Use Go's `http.Client` to send the POST request
4. Keep WebSocket for receiving messages (already working)
## Next Steps
Implement HTTP POST sending in the next iteration.

View File

@@ -1,142 +0,0 @@
# HTTP POST Implementation for IRC → Kosmi Messages
**Date**: October 31, 2025, 12:00 PM
**Status**: ✅ Implemented
## Summary
Successfully implemented HTTP POST for sending messages from IRC to Kosmi, replacing the problematic WebSocket mutation approach. Also cleaned up debug logging from troubleshooting sessions.
## Problem
The WebSocket-based approach for sending mutations was failing because:
1. The WebSocket connection was closing immediately after sending mutations
2. Protocol initialization and authentication complexities made WebSocket mutations unreliable
3. Even with correct GraphQL mutation format (`type: "start"`), the connection would close
## Solution
Switched to using **HTTP POST** for sending messages (GraphQL mutations) to Kosmi:
- Uses the browser's cookies for authentication (extracted via Playwright)
- Sends GraphQL mutations to `https://engine.kosmi.io/`
- Works reliably without WebSocket complexities
- WebSocket still used for receiving messages (subscriptions)
## Changes Made
### 1. Modified `bridge/kosmi/native_client.go`
**Replaced WebSocket-based SendMessage with HTTP POST:**
```go
func (c *NativeClient) SendMessage(text string) error {
// Get cookies from browser for authentication
cookies, err := c.page.Context().Cookies()
// Build GraphQL mutation
mutation := map[string]interface{}{
"query": "mutation SendMessage($body: String!, $roomID: ID!) { sendMessage(body: $body, roomID: $roomID) { id } }",
"variables": map[string]interface{}{
"body": text,
"roomID": c.roomID,
},
}
// Create HTTP POST request to https://engine.kosmi.io/
req, err := http.NewRequest("POST", "https://engine.kosmi.io/", bytes.NewBuffer(payload))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", "Mozilla/5.0...")
// Add cookies for authentication
for _, cookie := range cookies {
req.AddCookie(&http.Cookie{Name: cookie.Name, Value: cookie.Value})
}
// Send request
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
// Check response
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
}
return nil
}
```
**Added required imports:**
- `bytes`
- `io`
- `net/http`
### 2. Cleaned Up Debug Logging
**Removed from `bridge/kosmi/native_client.go`:**
- Browser console message listener
- JavaScript console.log statements in WebSocket interceptor
- Verbose emoji-based logging in SendMessage
**Removed from `bridge/kosmi/kosmi.go`:**
- Emoji-based debug logging (🔔, 📨, 🔍, ✅, ⏭️)
- Reduced verbosity of log messages
- Changed Info logs to Debug for routine operations
**Removed from `bridge/irc/handlers.go`:**
- Emoji-based debug logging (🔔, 📨, ⏭️, 🔌)
- Verbose PRIVMSG logging
**Removed from `matterbridge.toml`:**
- `Debug=true` from Kosmi section
- `DebugLevel=1` from IRC section
## Architecture
```
IRC → Matterbridge → Kosmi Bridge → HTTP POST → https://engine.kosmi.io/
(GraphQL mutation)
Kosmi → WebSocket → Browser (Playwright) → Kosmi Bridge → Matterbridge → IRC
(subscription)
```
**Key Points:**
- **Receiving**: WebSocket subscription (via Playwright-intercepted connection)
- **Sending**: HTTP POST with GraphQL mutation (using browser cookies)
- **Authentication**: Browser cookies obtained from Playwright page context
## Benefits
1. **Reliability**: HTTP POST is proven to work (from FINDINGS.md)
2. **Simplicity**: No WebSocket mutation complexity
3. **Authentication**: Leverages existing browser session cookies
4. **Clean Separation**: WebSocket for receiving, HTTP for sending
## Testing
Ready for user to test:
- ✅ IRC → Kosmi (HTTP POST implementation)
- ✅ Kosmi → IRC (WebSocket subscription, already working)
## Files Modified
1. `/Users/erikfredericks/dev-ai/HSO/irc-kosmi-relay/bridge/kosmi/native_client.go`
- Replaced SendMessage with HTTP POST implementation
- Added HTTP-related imports
- Removed debug logging
2. `/Users/erikfredericks/dev-ai/HSO/irc-kosmi-relay/bridge/kosmi/kosmi.go`
- Cleaned up debug logging
3. `/Users/erikfredericks/dev-ai/HSO/irc-kosmi-relay/bridge/irc/handlers.go`
- Cleaned up debug logging
4. `/Users/erikfredericks/dev-ai/HSO/irc-kosmi-relay/matterbridge.toml`
- Removed Debug and DebugLevel settings
## Next Steps
1. User to test IRC → Kosmi message relay
2. User to test Kosmi → IRC message relay
3. Verify bidirectional relay is working correctly

View File

@@ -1,201 +0,0 @@
# ✅ Final Working Solution: Kosmi ↔ IRC Relay
**Date**: October 31, 2025, 1:10 PM
**Status**: ✅ **FULLY FUNCTIONAL - BIDIRECTIONAL RELAY WORKING**
## Summary
Successfully implemented a fully working bidirectional message relay between Kosmi and IRC using a **Playwright-based UI automation approach**.
## Test Results
**IRC → Kosmi**: Working
**Kosmi → IRC**: Working
**Username formatting**: Consistent with `RemoteNickFormat`
**Message echo prevention**: Working (messages with `[irc]` prefix filtered out)
**Clean logging**: Debug code removed, production-ready
## Final Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ Matterbridge Gateway │
│ │
│ ┌──────────────────────┐ ┌──────────────────────┐ │
│ │ IRC Bridge │◄───────►│ Kosmi Bridge │ │
│ │ (irc.zeronode) │ │ (kosmi.hyperspaceout)│ │
│ └──────────────────────┘ └──────────┬───────────┘ │
│ │ │
└───────────────────────────────────────────────┼─────────────────┘
┌───────────▼───────────┐
│ Playwright Native │
│ Client │
│ │
│ • Browser automation │
│ • WebSocket (receive) │
│ • UI automation (send)│
└───────────┬────────────┘
┌───────────▼───────────┐
│ Kosmi Web UI │
│ (app.kosmi.io) │
└───────────────────────┘
```
## Implementation Details
### Message Receiving (Kosmi → IRC)
- **Method**: WebSocket subscription via Playwright-intercepted connection
- **Mechanism**: JavaScript injection captures WebSocket messages in the browser
- **Subscription**: `subscription { newMessage(roomId: "...") { body time user { displayName username } } }`
- **Processing**: Messages polled from JavaScript queue every 500ms
### Message Sending (IRC → Kosmi)
- **Method**: UI automation via Playwright
- **Mechanism**: JavaScript evaluation to interact with DOM
- **Process**:
1. Find visible chat input element (textarea, contenteditable, or text input)
2. Set input value to message text
3. Dispatch input/change events
4. Trigger send via button click or Enter key press
### Why This Approach?
After extensive investigation, we discovered:
1.**Direct WebSocket Connection**: Fails with 403 Forbidden (authentication/bot detection)
2.**HTTP POST GraphQL Mutation**: API only supports auth mutations (`anonLogin`, `slackLogin`), not `sendMessage`
3.**WebSocket Mutation via Playwright**: Connection closes immediately after sending mutation (protocol/auth issues)
4.**UI Automation**: Works reliably because it mimics real user interaction
## Key Files
### 1. `bridge/kosmi/native_client.go`
The Playwright-based client implementation:
- Launches headless Chromium browser
- Injects WebSocket access layer
- Navigates to Kosmi room
- Subscribes to messages via WebSocket
- Sends messages via UI automation
### 2. `bridge/kosmi/kosmi.go`
The Matterbridge bridge implementation:
- Implements `bridge.Bridger` interface
- Manages `NativeClient` lifecycle
- Handles message routing
- Filters echo messages (prevents loops)
### 3. `matterbridge.toml`
Configuration file:
```toml
[kosmi.hyperspaceout]
RoomURL="https://app.kosmi.io/room/@hyperspaceout"
RemoteNickFormat="[{PROTOCOL}] <{NICK}> "
[irc.zeronode]
Server="irc.zeronode.net:6697"
Nick="kosmi-relay"
RemoteNickFormat="[{PROTOCOL}] <{NICK}> "
UseTLS=true
```
## Message Flow
### IRC → Kosmi
1. User sends message in IRC: `Testing from IRC`
2. IRC bridge receives PRIVMSG
3. Matterbridge formats with `RemoteNickFormat`: `[irc] <username> Testing from IRC`
4. Kosmi bridge receives message
5. `NativeClient.SendMessage()` uses UI automation
6. JavaScript finds chat input, sets value, triggers send
7. Message appears in Kosmi chat
### Kosmi → IRC
1. User sends message in Kosmi: `Testing from Kosmi`
2. WebSocket subscription receives `newMessage` event
3. JavaScript queue captures the message
4. `pollMessages()` retrieves from queue
5. Kosmi bridge filters echo messages (checks for `[irc]` prefix)
6. Matterbridge formats with `RemoteNickFormat`: `[kosmi] <username> Testing from Kosmi`
7. IRC bridge sends to channel
8. Message appears in IRC
## Echo Prevention
Messages are tagged with protocol prefixes via `RemoteNickFormat`:
- IRC messages sent to Kosmi: `[irc] <username> message`
- Kosmi messages sent to IRC: `[kosmi] <username> message`
The Kosmi bridge filters out messages starting with `[irc]` to prevent echoing our own messages back.
## Deployment
### Docker Compose
```yaml
services:
matterbridge:
build: .
container_name: kosmi-irc-relay
volumes:
- ./matterbridge.toml:/app/matterbridge.toml:ro
restart: unless-stopped
```
### Running
```bash
docker-compose up -d --build
docker-compose logs -f
```
## Performance Characteristics
- **Startup Time**: ~10 seconds (Playwright browser launch + page load)
- **Message Latency**:
- IRC → Kosmi: ~100-500ms (UI automation)
- Kosmi → IRC: ~500-1000ms (polling interval)
- **Resource Usage**:
- Memory: ~300-400 MB (Chromium browser)
- CPU: Low after initialization
## Future Improvements
### Potential Optimizations
1. **Reduce Polling Interval**: Could decrease from 500ms to 250ms for lower latency
2. **WebSocket Send**: If Kosmi's auth/protocol can be reverse-engineered properly
3. **Direct GraphQL API**: If Kosmi exposes a `sendMessage` mutation in the future
### Known Limitations
1. **Browser Required**: Must run full Chromium browser (can be headless)
2. **Polling Latency**: 500ms delay for incoming messages
3. **UI Dependency**: Breaks if Kosmi changes their UI structure (input selectors)
## Troubleshooting
### Common Issues
**Problem**: "Could not find chat input element"
**Solution**: Kosmi may have changed their UI. Update selectors in `SendMessage()` method.
**Problem**: Messages not appearing in Kosmi
**Solution**: Check browser console logs, verify UI automation script is working.
**Problem**: WebSocket not connecting
**Solution**: Check network connectivity, verify Kosmi URL is correct.
**Problem**: Echo loop (messages keep bouncing)
**Solution**: Verify `RemoteNickFormat` is set correctly and echo filter is working.
## Conclusion
After extensive troubleshooting and multiple implementation attempts (direct WebSocket, HTTP POST, WebSocket mutations), we successfully achieved bidirectional message relay using **Playwright UI automation**. This approach is reliable, maintainable, and production-ready.
The relay now successfully:
✅ Sends messages from IRC to Kosmi
✅ Receives messages from Kosmi to IRC
✅ Prevents message echo loops
✅ Formats usernames consistently
✅ Runs in Docker with minimal configuration
**Status**: Production-ready ✅

View File

@@ -1,186 +0,0 @@
# Performance Optimizations: CPU and Memory Reduction
**Date**: October 31, 2025, 1:48 PM
**Status**: ✅ Successfully Implemented
## Overview
Successfully implemented three phases of conservative performance optimizations to reduce CPU and memory usage while maintaining full relay functionality and reliability.
## Optimizations Implemented
### Phase 1: Browser Launch Optimizations (High Impact)
**File**: `bridge/kosmi/native_client.go` (lines 46-71)
Added 17 resource-saving Chromium flags to disable unnecessary browser features:
```go
Args: []string{
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
// Resource optimizations for reduced CPU/memory usage
"--disable-gpu", // No GPU needed for chat
"--disable-software-rasterizer", // No rendering needed
"--disable-extensions", // No extensions needed
"--disable-background-networking", // No background requests
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-breakpad", // No crash reporting
"--disable-component-extensions-with-background-pages",
"--disable-features=TranslateUI", // No translation UI
"--disable-ipc-flooding-protection",
"--disable-renderer-backgrounding",
"--force-color-profile=srgb",
"--metrics-recording-only",
"--no-first-run", // Skip first-run tasks
"--mute-audio", // No audio needed
},
```
**Results**:
- Faster browser startup
- Reduced memory footprint
- Lower idle CPU usage
### Phase 2: Smart Polling Optimization (Medium Impact)
**File**: `bridge/kosmi/native_client.go` (lines 293-332)
Optimized the message polling loop to skip expensive operations when message queue is empty:
```go
func (c *NativeClient) pollMessages() error {
result, err := c.page.Evaluate(`
(function() {
if (!window.__KOSMI_MESSAGE_QUEUE__) return null;
if (window.__KOSMI_MESSAGE_QUEUE__.length === 0) return null; // Early exit
const messages = window.__KOSMI_MESSAGE_QUEUE__.slice();
window.__KOSMI_MESSAGE_QUEUE__ = [];
return messages;
})();
`)
if err != nil {
return err
}
// Early return if no messages (reduces CPU during idle)
if result == nil {
return nil
}
// Only perform expensive marshal/unmarshal when there are messages
// ...
}
```
**Results**:
- Reduced CPU usage during idle periods (when no messages are flowing)
- Eliminated unnecessary JSON marshal/unmarshal cycles
- Maintains same 500ms polling interval (no latency impact)
### Phase 3: Page Load Optimization (Low Impact)
**File**: `bridge/kosmi/native_client.go` (lines 104-111)
Changed page load strategy to wait only for DOM, not all network resources:
```go
if _, err := page.Goto(c.roomURL, playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateDomcontentloaded, // Changed from networkidle
}); err != nil {
c.Disconnect()
return fmt.Errorf("failed to navigate: %w", err)
}
```
**Results**:
- Faster startup (doesn't wait for images, fonts, external resources)
- Still waits for DOM (maintains reliability)
- Reduced initial page load time by ~2-3 seconds
## Performance Improvements
### Before Optimizations
- **Startup Time**: ~15 seconds
- **Memory Usage**: ~300-400 MB (estimated)
- **CPU Usage**: Higher during idle (constant polling overhead)
### After Optimizations
- **Startup Time**: ~12 seconds (20% improvement)
- **Memory Usage**: Expected 25-40% reduction
- **CPU Usage**: Expected 20-35% reduction during idle
## Testing Results
All three phases tested successfully:
**Phase 1 Testing**: Browser flags applied, relay connected successfully
**Phase 2 Testing**: Smart polling active, messages flowing normally
**Phase 3 Testing**: Fast page load, bidirectional relay confirmed working
**Test Messages**:
- IRC → Kosmi: ✅ Working
- Kosmi → IRC: ✅ Working
- Message formatting: ✅ Correct
- No errors in logs: ✅ Clean
## Implementation Strategy
Followed conservative, phased approach:
1. **Phase 1** → Test → Verify
2. **Phase 2** → Test → Verify
3. **Phase 3** → Test → Final Verification
Each phase was tested independently before proceeding to ensure no breakage occurred.
## Key Design Decisions
### Conservative Over Aggressive
- Maintained 500ms polling interval (didn't reduce to avoid potential issues)
- Used proven Chromium flags (well-documented, widely used)
- Tested each change independently
### Reliability First
- All optimizations preserve existing functionality
- No changes to message handling logic
- No caching of DOM selectors (could break if UI changes)
### No Breaking Changes
- Same message latency
- Same connection reliability
- Same error handling
## Future Optimization Opportunities
If more performance improvement is needed in the future:
1. **Reduce Polling Interval**: Could decrease from 500ms to 250ms for lower latency (trade-off: higher CPU)
2. **Selector Caching**: Cache found input element after first send (trade-off: breaks if UI changes)
3. **Connection Pooling**: Reuse browser instances across restarts (complex)
4. **WebSocket Direct Send**: If authentication protocol can be solved (requires more research)
## Monitoring Recommendations
To measure actual resource usage improvements:
```bash
# Monitor container resource usage
docker stats kosmi-irc-relay
# Check memory usage over time
docker stats kosmi-irc-relay --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}"
# View logs to ensure no errors
docker-compose logs -f --tail=50
```
## Conclusion
Successfully reduced CPU and memory usage through three conservative optimization phases while maintaining 100% functionality and reliability. The relay continues to work bidirectionally with no errors or performance degradation.
**Status**: Production-ready with optimizations ✅

View File

@@ -0,0 +1,57 @@
# !votes Command Design
## Summary
Add a `!votes` IRC/Kosmi command that displays session and all-time vote data for the currently playing game. The response is broadcast to all connected chats via the gateway. If there is no active session, no playing game, or any API call fails, the command logs the reason and silently does nothing.
## Output Format
Matches the existing vote tally style:
```
🗳️ Split the Room • 14👍 3👎 (Score: +11) | All-time: 127
```
- Left side: session votes for the current game (upvotes, downvotes, net score)
- Right side: all-time `popularity_score` from the game catalog
## Architecture
Uses the gateway-level event routing pattern (same as `!kreconnect`).
### Flow
1. User types `!votes` in IRC or Kosmi
2. Bridge detects the command, sends `EventVotesQuery` on `b.Remote`, returns without relaying
3. Gateway router catches the event in `handleReceive`
4. `handleEventVotesQuery` fetches data from the Jackbox API:
- `GetActiveSession()` to get session ID
- `GetSessionGames(sessionID)` to find the game with status "playing"
- `GetSessionVotes(sessionID)` to get per-game vote breakdown
- `GetGame(gameID)` to get all-time `popularity_score`
5. Formats the message and broadcasts via `broadcastJackboxMessage`
### Failure handling
All failures are logged at warn level and produce no chat output:
- No Jackbox client configured
- No active session
- No game currently playing
- API errors on any of the fetch calls
- No vote data found for the current game
## Files Changed
- `bridge/config/config.go` -- add `EventVotesQuery` constant
- `bridge/jackbox/client.go` -- add `GetSessionGames`, `GetSessionVotes`, `GetGame` methods and response structs
- `bridge/irc/handlers.go` -- detect `!votes` command, emit event
- `bridge/kosmi/kosmi.go` -- detect `!votes` command, emit event
- `gateway/router.go` -- call `handleEventVotesQuery` in `handleReceive`
- `gateway/handlers.go` -- implement `handleEventVotesQuery`
## API Endpoints Used
- `GET /api/sessions/active` (existing)
- `GET /api/sessions/{id}/games` (new client method)
- `GET /api/sessions/{id}/votes` (new client method)
- `GET /api/games/{id}` (new client method)

View File

@@ -14,6 +14,7 @@ import (
"github.com/42wim/matterbridge/bridge"
"github.com/42wim/matterbridge/bridge/config"
"github.com/42wim/matterbridge/bridge/jackbox"
"github.com/42wim/matterbridge/gateway/bridgemap"
)
@@ -49,6 +50,131 @@ func (r *Router) handleEventGetChannelMembers(msg *config.Message) {
}
}
// handleEventReconnectKosmi handles a manual Kosmi reconnect request (e.g. from !kreconnect).
// Returns true if the event was consumed and should not be routed further.
func (r *Router) handleEventReconnectKosmi(msg *config.Message) bool {
if msg.Event != config.EventReconnectKosmi {
return false
}
originChannel := msg.Channel
originAccount := msg.Account
for _, gw := range r.Gateways {
for _, br := range gw.Bridges {
if br.Protocol == "kosmi" {
r.logger.Infof("Reconnecting Kosmi bridge %s (requested via !kreconnect)", br.Account)
// Send confirmation to the IRC channel that requested it
if originAccount != "" && originChannel != "" {
if ircBr, ok := gw.Bridges[originAccount]; ok {
ircBr.Send(config.Message{
Text: "Reconnecting Kosmi...",
Channel: originChannel,
Username: "system",
Account: originAccount,
})
}
}
go gw.reconnectBridge(br)
return true
}
}
}
r.logger.Warn("!kreconnect: no Kosmi bridge found")
return true
}
// handleEventVotesQuery handles a !votes command by fetching vote data for the
// currently playing game and broadcasting the result to all bridges.
// Returns true if the event was consumed.
func (r *Router) handleEventVotesQuery(msg *config.Message) bool {
if msg.Event != config.EventVotesQuery {
return false
}
client := r.JackboxManager.GetClient()
if client == nil {
r.logger.Warn("!votes: Jackbox client not available")
return true
}
session, err := client.GetActiveSession()
if err != nil {
r.logger.Warnf("!votes: failed to get active session: %v", err)
return true
}
if session == nil {
r.logger.Warn("!votes: no active session")
return true
}
games, err := client.GetSessionGames(session.ID)
if err != nil {
r.logger.Warnf("!votes: failed to get session games: %v", err)
return true
}
var playingGame *jackbox.SessionGame
for i := range games {
if games[i].Status == "playing" {
playingGame = &games[i]
break
}
}
if playingGame == nil {
r.logger.Warn("!votes: no game currently playing in session")
return true
}
r.logger.Infof("!votes: session=%d, playing game ID=%d (session_games.id=%d) title=%q",
session.ID, playingGame.GameID, playingGame.ID, playingGame.Title)
votesResp, err := client.GetSessionVotes(session.ID)
if err != nil {
r.logger.Warnf("!votes: failed to get session votes: %v", err)
return true
}
var sessionUp, sessionDown, sessionNet int
if votesResp != nil {
r.logger.Infof("!votes: session votes response has %d entries", len(votesResp.Votes))
for _, v := range votesResp.Votes {
r.logger.Infof("!votes: vote entry game_id=%d title=%q up=%d down=%d net=%d",
v.GameID, v.Title, v.Upvotes, v.Downvotes, v.NetScore)
if v.GameID == playingGame.GameID {
sessionUp = v.Upvotes
sessionDown = v.Downvotes
sessionNet = v.NetScore
break
}
}
} else {
r.logger.Info("!votes: session votes response is nil")
}
game, err := client.GetGame(playingGame.GameID)
if err != nil {
r.logger.Warnf("!votes: failed to get game %d: %v", playingGame.GameID, err)
return true
}
var allTimeUp, allTimeDown, allTimeScore int
if game != nil {
allTimeUp = game.Upvotes
allTimeDown = game.Downvotes
allTimeScore = game.PopularityScore
}
message := fmt.Sprintf("🗳️ %s • Today: %d👍 %d👎 (Score: %d) • All-time: %d👍 %d👎 (Score: %d)",
playingGame.Title, sessionUp, sessionDown, sessionNet, allTimeUp, allTimeDown, allTimeScore)
r.broadcastJackboxMessage(message)
return true
}
// handleEventRejoinChannels handles rejoining of channels.
func (r *Router) handleEventRejoinChannels(msg *config.Message) {
if msg.Event != config.EventRejoinChannels {

View File

@@ -155,6 +155,12 @@ func (r *Router) getBridge(account string) *bridge.Bridge {
func (r *Router) handleReceive() {
for msg := range r.Message {
msg := msg // scopelint
if r.handleEventReconnectKosmi(&msg) {
continue
}
if r.handleEventVotesQuery(&msg) {
continue
}
r.handleEventGetChannelMembers(&msg)
r.handleEventFailure(&msg)
r.handleEventRejoinChannels(&msg)