diff --git a/Dockerfile b/Dockerfile index fecb061..813e8ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,5 +36,6 @@ CMD [ \ "--server.grpc-address", "0.0.0.0:3000", \ "--server.http-address", "0.0.0.0:3001", \ "--server.auth-address", "0.0.0.0:3002", \ + "--server.healthz-address", "0.0.0.0:3003", \ "--stack.config-file", "/var/lib/finch/finch.json" \ ] diff --git a/cmd/run/run.go b/cmd/run/run.go index 46ee726..c8ec447 100644 --- a/cmd/run/run.go +++ b/cmd/run/run.go @@ -23,6 +23,7 @@ func init() { Cmd.Flags().StringP("server.grpc-address", "", "127.0.0.1:3000", "Address to listen on for gRPC traffic") Cmd.Flags().StringP("server.http-address", "", "127.0.0.1:3001", "Address to listen on for HTTP traffic") Cmd.Flags().StringP("server.auth-address", "", "127.0.0.1:3002", "Address to listen on for auth traffic") + Cmd.Flags().StringP("server.healthz-address", "", "127.0.0.1:3003", "Address to listen on for healthz traffic") Cmd.Flags().StringP("server.log-level", "", "info", "Log level (debug, info, warn, error)") Cmd.Flags().StringP("server.log-format", "", "structured", "Log format (structured, json)") Cmd.Flags().StringP("stack.config-file", "", "/var/lib/finch/finch.json", "Config file of the stack") @@ -35,17 +36,23 @@ func runCmd(cmd *cobra.Command, args []string) { grpcAddr, _ := cmd.Flags().GetString("server.grpc-address") httpAddr, _ := cmd.Flags().GetString("server.http-address") authAddr, _ := cmd.Flags().GetString("server.auth-address") + healthzAddr, _ := cmd.Flags().GetString("server.healthz-address") config, _ := cmd.Flags().GetString("stack.config-file") logLevel, _ := cmd.Flags().GetString("server.log-level") logFormat, _ := cmd.Flags().GetString("server.log-format") setLogger(logLevel, logFormat) - manager, err := manager.New(config) + mgr, err := manager.New(config) cobra.CheckErr(err) ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer stop() - manager.Run(ctx, grpcAddr, httpAddr, authAddr) + mgr.Run(ctx, manager.Addresses{ + GRPC: grpcAddr, + HTTP: httpAddr, + Auth: authAddr, + Healthz: healthzAddr, + }) } diff --git a/internal/database/database.go b/internal/database/database.go index d4018e9..4893ba4 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -5,6 +5,7 @@ Licensed under the MIT License, see LICENSE file in the project root for details package database import ( + "context" "fmt" "log/slog" "net/url" @@ -50,8 +51,6 @@ func New(config *config.Config) (*Database, error) { path = fmt.Sprintf("%s/%s", config.Library(), uri.Host) } - fmt.Printf("Using SQLite database at path: %s\n", path) - if strings.HasSuffix(path, ":memory:") { path = ":memory:" } @@ -77,6 +76,29 @@ func (d *Database) Connection() *gorm.DB { return d.connection } +func (d *Database) Ping(ctx context.Context) error { + sqlDB, err := d.connection.DB() + if err != nil { + return err + } + + if err := sqlDB.PingContext(ctx); err != nil { + return err + } + + tx := d.connection.WithContext(ctx).Begin() + if tx.Error != nil { + return tx.Error + } + + if err := tx.Exec("UPDATE agents SET resource_id=resource_id WHERE 1=0").Error; err != nil { + _ = tx.Rollback() + return err + } + + return tx.Rollback().Error +} + func (d *Database) Migrate() error { slog.Debug("Migrating database schema") diff --git a/internal/database/database_test.go b/internal/database/database_test.go index 13bee57..e2f412c 100644 --- a/internal/database/database_test.go +++ b/internal/database/database_test.go @@ -5,6 +5,8 @@ Licensed under the MIT License, see LICENSE file in the project root for details package database import ( + "context" + "os" "testing" "github.com/stretchr/testify/assert" @@ -145,3 +147,39 @@ func Test_MigrateSucceeds_RemovesCredentialsColumns(t *testing.T) { assert.False(t, has, "column '"+column+"' should be removed by second migration") } } + +func Test_PingSucceeds(t *testing.T) { + cfg := config.NewFromData(&config.Data{ + Database: "sqlite:///:memory:", + }, "") + + db, err := New(cfg) + assert.NoError(t, err, "new database instance") + + err = db.Migrate() + assert.NoError(t, err, "migrate database") + + err = db.Ping(context.Background()) + assert.NoError(t, err, "ping database") +} + +func Test_PingReturnsError_WritePermissionDenied(t *testing.T) { + tmpFile, err := os.CreateTemp("", "testdb-*.db") + assert.NoError(t, err, "create temporary file") + defer func() { + _ = os.Remove(tmpFile.Name()) + }() + + cfg := config.NewFromData(&config.Data{ + Database: "sqlite:///" + tmpFile.Name(), + }, "") + + db, err := New(cfg) + assert.NoError(t, err, "new database instance") + + err = os.Chmod(tmpFile.Name(), 0400) + assert.NoError(t, err, "change file permissions") + + err = db.Ping(context.Background()) + assert.Error(t, err, "ping database with read-only permissions") +} diff --git a/internal/healthz/server.go b/internal/healthz/server.go new file mode 100644 index 0000000..f69c998 --- /dev/null +++ b/internal/healthz/server.go @@ -0,0 +1,80 @@ +/* +Copyright (c) Tobias Schäfer. All rights reserved. +Licensed under the MIT License, see LICENSE file in the project root for details. +*/ +package healthz + +import ( + "context" + "log/slog" + "net" + "net/http" + "time" + + "github.com/tschaefer/finch/internal/database" + "github.com/tschaefer/finch/internal/version" +) + +type Server struct { + server *http.Server + db *database.Database +} + +func NewServer(addr string, db *database.Database) *Server { + slog.Debug("Initializing Healthz Server", "addr", addr) + + s := &Server{db: db} + + mux := http.NewServeMux() + mux.HandleFunc("/healthz", s.handleHealthz) + + s.server = &http.Server{ + Addr: addr, + Handler: mux, + ReadTimeout: 5 * time.Second, + WriteTimeout: 5 * time.Second, + IdleTimeout: 30 * time.Second, + } + + return s +} + +func (s *Server) Start() error { + listen, err := net.Listen("tcp", s.server.Addr) + if err != nil { + return err + } + + go func() { + if err := s.server.Serve(listen); err != nil && err != http.ErrServerClosed { + slog.Error("healthz server error", "error", err) + } + }() + + return nil +} + +func (s *Server) Stop(ctx context.Context) error { + return s.server.Shutdown(ctx) +} + +func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) + return + } + + w.Header().Set("X-Finch-Commit", version.Commit()) + w.Header().Set("X-Finch-Release", version.Release()) + + ctx, cancel := context.WithTimeout(r.Context(), time.Second) + defer cancel() + + if err := s.db.Ping(ctx); err != nil { + slog.Error("database ping failed", "error", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) +} diff --git a/internal/healthz/server_test.go b/internal/healthz/server_test.go new file mode 100644 index 0000000..4678078 --- /dev/null +++ b/internal/healthz/server_test.go @@ -0,0 +1,81 @@ +/* +Copyright (c) Tobias Schäfer. All rights reserved. +Licensed under the MIT License, see LICENSE file in the project root for details. +*/ +package healthz + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tschaefer/finch/internal/config" + "github.com/tschaefer/finch/internal/database" + "github.com/tschaefer/finch/internal/version" +) + +var testCfg = config.NewFromData(&config.Data{ + Id: "test-id", + Hostname: "localhost", + CreatedAt: "2025-01-01T00:00:00Z", + Database: "sqlite:///:memory:", + Secret: "gpFb8WTh5iELimbX3YfuvRYRh2Z2PHa8Lmoog0a25QQ=", +}, "") + +func newTestDB(t *testing.T) *database.Database { + t.Helper() + db, err := database.New(testCfg) + if err != nil { + t.Fatal(err) + } + if err := db.Migrate(); err != nil { + t.Fatal(err) + } + return db +} + +func Test_HealthzHandler_Healthy(t *testing.T) { + s := NewServer("127.0.0.1:0", newTestDB(t)) + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + rec := httptest.NewRecorder() + + s.handleHealthz(rec, req) + + assert.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, version.Commit(), rec.Header().Get("X-Finch-Commit")) + assert.Equal(t, version.Release(), rec.Header().Get("X-Finch-Release")) +} + +func Test_HealthzHandler_Unhealthy(t *testing.T) { + db := newTestDB(t) + + sqlDB, err := db.Connection().DB() + if err != nil { + t.Fatal(err) + } + _ = sqlDB.Close() + + s := NewServer("127.0.0.1:0", db) + + req := httptest.NewRequest(http.MethodGet, "/healthz", nil) + rec := httptest.NewRecorder() + + s.handleHealthz(rec, req) + + assert.Equal(t, http.StatusInternalServerError, rec.Code) + assert.Equal(t, version.Commit(), rec.Header().Get("X-Finch-Commit")) + assert.Equal(t, version.Release(), rec.Header().Get("X-Finch-Release")) +} + +func Test_HealthzHandler_MethodNotAllowed(t *testing.T) { + s := NewServer("127.0.0.1:0", newTestDB(t)) + + req := httptest.NewRequest(http.MethodPost, "/healthz", nil) + rec := httptest.NewRecorder() + + s.handleHealthz(rec, req) + + assert.Equal(t, http.StatusMethodNotAllowed, rec.Code) +} diff --git a/internal/manager/manager.go b/internal/manager/manager.go index bbe6ed9..bbbf6bb 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -6,6 +6,7 @@ package manager import ( "context" + "fmt" "log/slog" "net" "os" @@ -17,6 +18,7 @@ import ( "github.com/tschaefer/finch/internal/controller" "github.com/tschaefer/finch/internal/database" grpcserver "github.com/tschaefer/finch/internal/grpc" + healthzserver "github.com/tschaefer/finch/internal/healthz" httpserver "github.com/tschaefer/finch/internal/http" "github.com/tschaefer/finch/internal/model" "github.com/tschaefer/finch/internal/profiler" @@ -33,6 +35,13 @@ type Manager struct { profiler *profiler.Profiler } +type Addresses struct { + GRPC string + HTTP string + Auth string + Healthz string +} + func New(cfgFile string) (*Manager, error) { slog.Debug("Initializing Manager", "cfgFile", cfgFile) @@ -67,35 +76,42 @@ func New(cfgFile string) (*Manager, error) { }, nil } -func (m *Manager) Run(ctx context.Context, grpcAddr string, httpAddr string, authAddr string) { - slog.Debug("Running Manager", "grpcAddr", grpcAddr, "httpAddr", httpAddr, "authAddr", authAddr) +func (m *Manager) Run(ctx context.Context, addrs Addresses) { + slog.Debug("Running Manager", "addrs", fmt.Sprintf("%+v", addrs)) ctx, cancel := context.WithCancel(ctx) defer cancel() slog.Info("Starting Finch management server", "release", version.Release(), "commit", version.Commit()) - slog.Info("Listening on " + grpcAddr + " (gRPC)") - slog.Info("Listening on " + httpAddr + " (HTTP)") - slog.Info("Listening on " + authAddr + " (Auth)") + slog.Info("Listening on " + addrs.GRPC + " (gRPC)") + slog.Info("Listening on " + addrs.HTTP + " (HTTP)") + slog.Info("Listening on " + addrs.Auth + " (Auth)") + slog.Info("Listening on " + addrs.Healthz + " (Healthz)") - grpcServer, err := m.runGRPCServer(grpcAddr) + grpcServer, err := m.runGRPCServer(addrs.GRPC) if err != nil { slog.Error("Failed to start gRPC server", "error", err) os.Exit(1) } - httpServer, err := m.runHTTPServer(httpAddr) + httpServer, err := m.runHTTPServer(addrs.HTTP) if err != nil { slog.Error("Failed to start HTTP server", "error", err) os.Exit(1) } - authServer, err := m.runAuthServer(authAddr) + authServer, err := m.runAuthServer(addrs.Auth) if err != nil { slog.Error("Failed to start Auth server", "error", err) os.Exit(1) } + healthzServer, err := m.runHealthzServer(addrs.Healthz) + if err != nil { + slog.Error("Failed to start healthz server", "error", err) + os.Exit(1) + } + <-ctx.Done() slog.Info("Shutting down servers...") @@ -106,6 +122,10 @@ func (m *Manager) Run(ctx context.Context, grpcAddr string, httpAddr string, aut slog.Error("Auth server shutdown error", "error", err) } + if err := healthzServer.Stop(shutdownCtx); err != nil { + slog.Error("healthz server shutdown error", "error", err) + } + if err := httpServer.Stop(shutdownCtx); err != nil { slog.Error("HTTP server shutdown error", "error", err) } @@ -168,3 +188,11 @@ func (m *Manager) runAuthServer(authAddr string) (*auth.Server, error) { } return authServer, nil } + +func (m *Manager) runHealthzServer(healthzAddr string) (*healthzserver.Server, error) { + healthzServer := healthzserver.NewServer(healthzAddr, m.database) + if err := healthzServer.Start(); err != nil { + return nil, err + } + return healthzServer, nil +} diff --git a/internal/manager/manager_test.go b/internal/manager/manager_test.go index d078245..19000f1 100644 --- a/internal/manager/manager_test.go +++ b/internal/manager/manager_test.go @@ -106,7 +106,17 @@ func Test_RunSucceeds(t *testing.T) { authAddr := authListener.Addr().String() _ = authListener.Close() - go m.Run(ctx, grpcAddr, httpAddr, authAddr) + healthzListener, err := net.Listen("tcp", "127.0.0.1:0") + assert.NoError(t, err, "allocate healthz port") + healthzAddr := healthzListener.Addr().String() + _ = healthzListener.Close() + + go m.Run(ctx, Addresses{ + GRPC: grpcAddr, + HTTP: httpAddr, + Auth: authAddr, + Healthz: healthzAddr, + }) var conn net.Conn for range 50 {