source: code/trunk/server.go@ 741

Last change on this file since 741 was 735, checked in by contact, 4 years ago

Add exponential backoff when re-connecting to upstream

The first reconnection attempt waits for 1min, the second the 2min,
and so on up to 10min. There's a 1min jitter so that multiple failed
connections don't try to reconnect at the exact same time.

Closes: https://todo.sr.ht/~emersion/soju/161

File size: 8.8 KB
Line 
1package soju
2
3import (
4 "context"
5 "errors"
6 "fmt"
7 "io"
8 "log"
9 "mime"
10 "net"
11 "net/http"
12 "runtime/debug"
13 "sync"
14 "sync/atomic"
15 "time"
16
17 "github.com/prometheus/client_golang/prometheus"
18 "github.com/prometheus/client_golang/prometheus/promauto"
19 "gopkg.in/irc.v3"
20 "nhooyr.io/websocket"
21
22 "git.sr.ht/~emersion/soju/config"
23)
24
25// TODO: make configurable
26var retryConnectMinDelay = time.Minute
27var retryConnectMaxDelay = 10 * time.Minute
28var retryConnectJitter = time.Minute
29var connectTimeout = 15 * time.Second
30var writeTimeout = 10 * time.Second
31var upstreamMessageDelay = 2 * time.Second
32var upstreamMessageBurst = 10
33var backlogTimeout = 10 * time.Second
34var handleDownstreamMessageTimeout = 10 * time.Second
35var downstreamRegisterTimeout = 30 * time.Second
36var chatHistoryLimit = 1000
37var backlogLimit = 4000
38
39type Logger interface {
40 Print(v ...interface{})
41 Printf(format string, v ...interface{})
42}
43
44type prefixLogger struct {
45 logger Logger
46 prefix string
47}
48
49var _ Logger = (*prefixLogger)(nil)
50
51func (l *prefixLogger) Print(v ...interface{}) {
52 v = append([]interface{}{l.prefix}, v...)
53 l.logger.Print(v...)
54}
55
56func (l *prefixLogger) Printf(format string, v ...interface{}) {
57 v = append([]interface{}{l.prefix}, v...)
58 l.logger.Printf("%v"+format, v...)
59}
60
61type int64Gauge struct {
62 v int64 // atomic
63}
64
65func (g *int64Gauge) Add(delta int64) {
66 atomic.AddInt64(&g.v, delta)
67}
68
69func (g *int64Gauge) Value() int64 {
70 return atomic.LoadInt64(&g.v)
71}
72
73func (g *int64Gauge) Float64() float64 {
74 return float64(g.Value())
75}
76
77type Config struct {
78 Hostname string
79 Title string
80 LogPath string
81 Debug bool
82 HTTPOrigins []string
83 AcceptProxyIPs config.IPSet
84 MaxUserNetworks int
85 MultiUpstream bool
86 MOTD string
87 UpstreamUserIPs []*net.IPNet
88}
89
90type Server struct {
91 Logger Logger
92 Identd *Identd // can be nil
93 MetricsRegistry prometheus.Registerer // can be nil
94
95 config atomic.Value // *Config
96 db Database
97 stopWG sync.WaitGroup
98
99 lock sync.Mutex
100 listeners map[net.Listener]struct{}
101 users map[string]*user
102
103 metrics struct {
104 downstreams int64Gauge
105 upstreams int64Gauge
106
107 upstreamOutMessagesTotal prometheus.Counter
108 upstreamInMessagesTotal prometheus.Counter
109 downstreamOutMessagesTotal prometheus.Counter
110 downstreamInMessagesTotal prometheus.Counter
111
112 upstreamConnectErrorsTotal prometheus.Counter
113 }
114}
115
116func NewServer(db Database) *Server {
117 srv := &Server{
118 Logger: log.New(log.Writer(), "", log.LstdFlags),
119 db: db,
120 listeners: make(map[net.Listener]struct{}),
121 users: make(map[string]*user),
122 }
123 srv.config.Store(&Config{
124 Hostname: "localhost",
125 MaxUserNetworks: -1,
126 MultiUpstream: true,
127 })
128 return srv
129}
130
131func (s *Server) prefix() *irc.Prefix {
132 return &irc.Prefix{Name: s.Config().Hostname}
133}
134
135func (s *Server) Config() *Config {
136 return s.config.Load().(*Config)
137}
138
139func (s *Server) SetConfig(cfg *Config) {
140 s.config.Store(cfg)
141}
142
143func (s *Server) Start() error {
144 s.registerMetrics()
145
146 users, err := s.db.ListUsers(context.TODO())
147 if err != nil {
148 return err
149 }
150
151 s.lock.Lock()
152 for i := range users {
153 s.addUserLocked(&users[i])
154 }
155 s.lock.Unlock()
156
157 return nil
158}
159
160func (s *Server) registerMetrics() {
161 factory := promauto.With(s.MetricsRegistry)
162
163 factory.NewGaugeFunc(prometheus.GaugeOpts{
164 Name: "soju_users_active",
165 Help: "Current number of active users",
166 }, func() float64 {
167 s.lock.Lock()
168 n := len(s.users)
169 s.lock.Unlock()
170 return float64(n)
171 })
172
173 factory.NewGaugeFunc(prometheus.GaugeOpts{
174 Name: "soju_downstreams_active",
175 Help: "Current number of downstream connections",
176 }, s.metrics.downstreams.Float64)
177
178 factory.NewGaugeFunc(prometheus.GaugeOpts{
179 Name: "soju_upstreams_active",
180 Help: "Current number of upstream connections",
181 }, s.metrics.upstreams.Float64)
182
183 s.metrics.upstreamOutMessagesTotal = factory.NewCounter(prometheus.CounterOpts{
184 Name: "soju_upstream_out_messages_total",
185 Help: "Total number of outgoing messages sent to upstream servers",
186 })
187
188 s.metrics.upstreamInMessagesTotal = factory.NewCounter(prometheus.CounterOpts{
189 Name: "soju_upstream_in_messages_total",
190 Help: "Total number of incoming messages received from upstream servers",
191 })
192
193 s.metrics.downstreamOutMessagesTotal = factory.NewCounter(prometheus.CounterOpts{
194 Name: "soju_downstream_out_messages_total",
195 Help: "Total number of outgoing messages sent to downstream clients",
196 })
197
198 s.metrics.downstreamInMessagesTotal = factory.NewCounter(prometheus.CounterOpts{
199 Name: "soju_downstream_in_messages_total",
200 Help: "Total number of incoming messages received from downstream clients",
201 })
202
203 s.metrics.upstreamConnectErrorsTotal = factory.NewCounter(prometheus.CounterOpts{
204 Name: "soju_upstream_connect_errors_total",
205 Help: "Total number of upstream connection errors",
206 })
207}
208
209func (s *Server) Shutdown() {
210 s.lock.Lock()
211 for ln := range s.listeners {
212 if err := ln.Close(); err != nil {
213 s.Logger.Printf("failed to stop listener: %v", err)
214 }
215 }
216 for _, u := range s.users {
217 u.events <- eventStop{}
218 }
219 s.lock.Unlock()
220
221 s.stopWG.Wait()
222
223 if err := s.db.Close(); err != nil {
224 s.Logger.Printf("failed to close DB: %v", err)
225 }
226}
227
228func (s *Server) createUser(ctx context.Context, user *User) (*user, error) {
229 s.lock.Lock()
230 defer s.lock.Unlock()
231
232 if _, ok := s.users[user.Username]; ok {
233 return nil, fmt.Errorf("user %q already exists", user.Username)
234 }
235
236 err := s.db.StoreUser(ctx, user)
237 if err != nil {
238 return nil, fmt.Errorf("could not create user in db: %v", err)
239 }
240
241 return s.addUserLocked(user), nil
242}
243
244func (s *Server) forEachUser(f func(*user)) {
245 s.lock.Lock()
246 for _, u := range s.users {
247 f(u)
248 }
249 s.lock.Unlock()
250}
251
252func (s *Server) getUser(name string) *user {
253 s.lock.Lock()
254 u := s.users[name]
255 s.lock.Unlock()
256 return u
257}
258
259func (s *Server) addUserLocked(user *User) *user {
260 s.Logger.Printf("starting bouncer for user %q", user.Username)
261 u := newUser(s, user)
262 s.users[u.Username] = u
263
264 s.stopWG.Add(1)
265
266 go func() {
267 defer func() {
268 if err := recover(); err != nil {
269 s.Logger.Printf("panic serving user %q: %v\n%v", user.Username, err, debug.Stack())
270 }
271 }()
272
273 u.run()
274
275 s.lock.Lock()
276 delete(s.users, u.Username)
277 s.lock.Unlock()
278
279 s.stopWG.Done()
280 }()
281
282 return u
283}
284
285var lastDownstreamID uint64 = 0
286
287func (s *Server) handle(ic ircConn) {
288 defer func() {
289 if err := recover(); err != nil {
290 s.Logger.Printf("panic serving downstream %q: %v\n%v", ic.RemoteAddr(), err, debug.Stack())
291 }
292 }()
293
294 s.metrics.downstreams.Add(1)
295 id := atomic.AddUint64(&lastDownstreamID, 1)
296 dc := newDownstreamConn(s, ic, id)
297 if err := dc.runUntilRegistered(); err != nil {
298 if !errors.Is(err, io.EOF) {
299 dc.logger.Print(err)
300 }
301 } else {
302 dc.user.events <- eventDownstreamConnected{dc}
303 if err := dc.readMessages(dc.user.events); err != nil {
304 dc.logger.Print(err)
305 }
306 dc.user.events <- eventDownstreamDisconnected{dc}
307 }
308 dc.Close()
309 s.metrics.downstreams.Add(-1)
310}
311
312func (s *Server) Serve(ln net.Listener) error {
313 s.lock.Lock()
314 s.listeners[ln] = struct{}{}
315 s.lock.Unlock()
316
317 s.stopWG.Add(1)
318
319 defer func() {
320 s.lock.Lock()
321 delete(s.listeners, ln)
322 s.lock.Unlock()
323
324 s.stopWG.Done()
325 }()
326
327 for {
328 conn, err := ln.Accept()
329 if isErrClosed(err) {
330 return nil
331 } else if err != nil {
332 return fmt.Errorf("failed to accept connection: %v", err)
333 }
334
335 go s.handle(newNetIRCConn(conn))
336 }
337}
338
339func (s *Server) ServeHTTP(w http.ResponseWriter, req *http.Request) {
340 conn, err := websocket.Accept(w, req, &websocket.AcceptOptions{
341 Subprotocols: []string{"text.ircv3.net"}, // non-compliant, fight me
342 OriginPatterns: s.Config().HTTPOrigins,
343 })
344 if err != nil {
345 s.Logger.Printf("failed to serve HTTP connection: %v", err)
346 return
347 }
348
349 isProxy := false
350 if host, _, err := net.SplitHostPort(req.RemoteAddr); err == nil {
351 if ip := net.ParseIP(host); ip != nil {
352 isProxy = s.Config().AcceptProxyIPs.Contains(ip)
353 }
354 }
355
356 // Only trust the Forwarded header field if this is a trusted proxy IP
357 // to prevent users from spoofing the remote address
358 remoteAddr := req.RemoteAddr
359 if isProxy {
360 forwarded := parseForwarded(req.Header)
361 if forwarded["for"] != "" {
362 remoteAddr = forwarded["for"]
363 }
364 }
365
366 s.handle(newWebsocketIRCConn(conn, remoteAddr))
367}
368
369func parseForwarded(h http.Header) map[string]string {
370 forwarded := h.Get("Forwarded")
371 if forwarded == "" {
372 return map[string]string{
373 "for": h.Get("X-Forwarded-For"),
374 "proto": h.Get("X-Forwarded-Proto"),
375 "host": h.Get("X-Forwarded-Host"),
376 }
377 }
378 // Hack to easily parse header parameters
379 _, params, _ := mime.ParseMediaType("hack; " + forwarded)
380 return params
381}
382
383type ServerStats struct {
384 Users int
385 Downstreams int64
386 Upstreams int64
387}
388
389func (s *Server) Stats() *ServerStats {
390 var stats ServerStats
391 s.lock.Lock()
392 stats.Users = len(s.users)
393 s.lock.Unlock()
394 stats.Downstreams = s.metrics.downstreams.Value()
395 stats.Upstreams = s.metrics.upstreams.Value()
396 return &stats
397}
Note: See TracBrowser for help on using the repository browser.