From 7d265c886ae88149bc45b1d44f9c2ea162b6e293 Mon Sep 17 00:00:00 2001 From: continuist Date: Sun, 7 Sep 2025 21:36:31 -0400 Subject: [PATCH] Update how Forgejo runner and Podman are installed on prod --- CI_CD_PIPELINE_SETUP_GUIDE.md | 423 +++++++++++++--------------------- 1 file changed, 160 insertions(+), 263 deletions(-) diff --git a/CI_CD_PIPELINE_SETUP_GUIDE.md b/CI_CD_PIPELINE_SETUP_GUIDE.md index 9fe2404..9608251 100644 --- a/CI_CD_PIPELINE_SETUP_GUIDE.md +++ b/CI_CD_PIPELINE_SETUP_GUIDE.md @@ -74,10 +74,19 @@ This guide covers setting up a complete Continuous Integration/Continuous Deploy - **Systemd user manager** for robust rootless Podman services ### Production Linode Features -- Podman-based application deployment -- Nginx reverse proxy with security headers -- Automated backups and monitoring -- Firewall and fail2ban protection +- **Rootless Podman** deployment with maximum security hardening under PROD_SERVICE_USER only +- **Host nginx reverse proxy** with SSL termination (ports 80/443) - containers serve internal ports (8080/8443) only +- **Zero host port exposure** for backend/frontend/postgres - UNIX sockets only for internal communication +- **Systemd user services** for automatic restart and persistence via systemd user manager +- **No Podman TCP sockets** - UNIX socket communication only (no ports 2375/2376 exposed) +- **Container hardening**: readOnlyRootFilesystem, no privilege escalation, capabilities.drop=ALL +- **Secrets management**: Kubernetes Secrets or mounted env-files for secure credential handling +- **IPv4-only networking** with UFW firewall allowing only ports 22/80/443 +- **Artifact-based deployment** from OCI archives - no registry access on production +- **Local image references** only (localhost/backend:deployed) - prevents external registry dependencies +- **Automatic health monitoring** with liveness and readiness probes for all services +- **Resource limits** and constraints to prevent resource exhaustion attacks +- **Fail2ban protection** for SSH and application-level intrusion prevention ### Pipeline Features - **Ephemeral testing** - fresh PiP container per CI run with maximum security @@ -1762,209 +1771,175 @@ pwd exit ``` -### Step 11: Install Podman +### Step 11: Install Forgejo Actions Runner -#### 11.1 Install Podman +#### 5.0 A clean FHS-aligned setup where prod-deploy (sudo) installs/configures everything system-wide, and the Forgejo runner + rootless Podman run as prod-service. We keep configs under /etc, state under /var/lib, cache under /var/cache, logs via journald, binaries in /usr/local/bin, and user runtime in /run/user/. -```bash -# Install Podman and related tools -sudo apt install -y podman +##### 11.1 Create vars -# Verify installation -podman --version -``` +SVC_USER="prod-service" # non-sudo service user that runs jobs +SVC_UID="$(id -u "$SVC_USER" 2>/dev/null || echo)" || true +FORGEJO_URL="https://git.gcdo.org/" +RUNNER_NAME="prod-runner" +RUNNER_LABELS="prod" -#### 11.2 Configure Podman for Production Service Account Only +##### 11.2 System prerequisites (packages, idmaps, linger) -**Security Restriction**: Configure Podman to only be usable by PROD_SERVICE_USER to prevent unauthorized container operations. +# Packages (Ubuntu 24.04) +sudo apt-get update -y +sudo apt-get install -y podman uidmap slirp4netns fuse-overlayfs dbus-user-session curl jq ca-certificates -```bash -# Create podman group for controlled access -sudo groupadd podman +# Ensure the service user exists +id "$SVC_USER" >/dev/null 2>&1 || sudo adduser --disabled-password --gecos "" "$SVC_USER" +SVC_UID="$(id -u "$SVC_USER")" -# Add PROD_SERVICE_USER to podman group -sudo usermod -aG podman PROD_SERVICE_USER +# Subordinate ID ranges for rootless +grep -q "^${SVC_USER}:" /etc/subuid || echo "${SVC_USER}:100000:65536" | sudo tee -a /etc/subuid >/dev/null +grep -q "^${SVC_USER}:" /etc/subgid || echo "${SVC_USER}:100000:65536" | sudo tee -a /etc/subgid >/dev/null -# Configure Podman socket permissions (restrict to podman group) -sudo mkdir -p /etc/containers -sudo tee /etc/containers/containers.conf > /dev/null << 'EOF' -[engine] -events_logger = "file" +# Ensure the user manager exists/runs at boot +sudo loginctl enable-linger "$SVC_USER" +sudo systemctl start "user@${SVC_UID}.service" -[network] -dns_bind_port = 53 +##### 11.3 Rootless Podman socket (user scope; runtime in /run/user/) -[engine.runtimes] -runc = [ - "/usr/bin/runc", -] +# Tell root-invoked systemctl which user bus/runtime to target +export XDG_RUNTIME_DIR="/run/user/${SVC_UID}" +export DBUS_SESSION_BUS_ADDRESS="unix:path=${XDG_RUNTIME_DIR}/bus" -[engine.socket_group] -group = "podman" -EOF +# Enable the user’s Docker-API-compatible Podman UNIX socket (no TCP) +sudo -u "$SVC_USER" XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS \ + systemctl --user enable --now podman.socket -# Configure user namespace for rootless operation (if not already done) -echo 'kernel.unprivileged_userns_clone=1' | sudo tee -a /etc/sysctl.conf -sudo sysctl -p +# Verify the UNIX socket path under /run (FHS: volatile runtime data) +sudo -u "$SVC_USER" ss -lx | grep 'podman/podman.sock' >/dev/null || { echo "Podman user socket missing"; exit 1; } -# Verify PROD_SERVICE_USER can access Podman -sudo -u PROD_SERVICE_USER podman --version +##### 11.4 Place the runner binary in /usr/local/bin (local admin install) -# Test that other users cannot access Podman socket -sudo -u PROD_DEPLOY_USER podman --version || echo "Good: PROD_DEPLOY_USER cannot access Podman" -``` - -**What this does**: -- Creates dedicated `podman` group for controlled access -- Restricts Podman socket access to only members of the `podman` group -- Ensures only PROD_SERVICE_USER can execute Podman commands -- Prevents PROD_DEPLOY_USER and other users from running containers -- Maintains rootless operation for security - -#### 11.4 Create Application Directory - -```bash -# Create application directory for deployment -sudo mkdir -p /opt/APP_NAME -sudo chown PROD_SERVICE_USER:PROD_SERVICE_USER /opt/APP_NAME -sudo chmod 755 /opt/APP_NAME - -# Verify the directory was created correctly -ls -la /opt/APP_NAME -``` - -**What this does**: -- Creates the application directory that will be used for deployment -- Sets proper ownership for the PROD_SERVICE_USER -- Ensures the directory exists before the CI workflow runs - -### Step 12: Set Up Forgejo Runner for Production Deployment - -**Important**: The Production Linode needs a Forgejo runner to execute the deployment job from the CI/CD workflow. This runner will pull images from Forgejo Container Registry and deploy using the production pod configuration. - -#### 12.1 Download Runner - -**Important**: Run this step as the **PROD_DEPLOY_USER** (not root or PROD_SERVICE_USER). The PROD_DEPLOY_USER handles deployment tasks including downloading and installing the Forgejo runner. - -```bash -cd ~ +sudo install -d -m 0755 /usr/local/bin # Get the latest version dynamically LATEST_VERSION=$(curl -s https://code.forgejo.org/api/v1/repos/forgejo/runner/releases | jq -r '.[0].tag_name') echo "Downloading Forgejo runner version: $LATEST_VERSION" -# Download the latest runner -wget https://code.forgejo.org/forgejo/runner/releases/download/${LATEST_VERSION}/forgejo-runner-${LATEST_VERSION#v}-linux-amd64 -chmod +x forgejo-runner-${LATEST_VERSION#v}-linux-amd64 -sudo mv forgejo-runner-${LATEST_VERSION#v}-linux-amd64 /usr/bin/forgejo-runner -``` +curl -fsSL "https://code.forgejo.org/forgejo/runner/releases/download/${LATEST_VERSION}/forgejo-runner-${LATEST_VERSION#v}-linux-amd64" \ + | sudo tee /usr/local/bin/forgejo-runner >/dev/null +sudo chmod 0755 /usr/local/bin/forgejo-runner -**Alternative: Pin to Specific Version (Recommended for Production)** +##### 11.5 Create FHS directories for runner state and cache -If you prefer to pin to a specific version for stability, replace the dynamic download with: +# State (tokens, work dirs) → /var/lib; Cache → /var/cache +sudo install -d -o "$SVC_USER" -g "$SVC_USER" -m 0750 /var/lib/forgejo-runner +sudo install -d -o "$SVC_USER" -g "$SVC_USER" -m 0750 /var/lib/forgejo-runner/work +sudo install -d -o "$SVC_USER" -g "$SVC_USER" -m 0750 /var/cache/forgejo-runner -```bash -cd ~ -VERSION="v6.3.1" # Pin to specific version -wget https://code.forgejo.org/forgejo/runner/releases/download/${VERSION}/forgejo-runner-${VERSION#v}-linux-amd64 -chmod +x forgejo-runner-${VERSION#v}-linux-amd64 -sudo mv forgejo-runner-${VERSION#v}-linux-amd64 /usr/bin/forgejo-runner -``` +##### 11.6 Register the runner as prod-service with state in /var/lib -**What this does**: -- **Dynamic approach**: Downloads the latest stable Forgejo Actions runner -- **Version pinning**: Allows you to specify a known-good version for production -- **System installation**: Installs the binary system-wide in `/usr/bin/` for proper Linux structure -- **Makes the binary executable** and available system-wide +REG_TOKEN="PASTE_A_FRESH_REGISTRATION_TOKEN_HERE" # short-lived token -**Production Recommendation**: Use version pinning in production environments to ensure consistency and avoid unexpected breaking changes. +# Run registration inside /var/lib/forgejo-runner so .runner lands there +# Ensure state dir exists and owned by the service user +sudo install -d -o prod-service -g prod-service -m 0750 /var/lib/forgejo-runner -#### 12.2 Get Registration Token +# Re-register and write .runner into /var/lib/forgejo-runner +sudo -u prod-service \ + FORGEJO_URL="$FORGEJO_URL" \ + REG_TOKEN="$REG_TOKEN" \ + RUNNER_NAME="$RUNNER_NAME" \ + RUNNER_LABELS="$RUNNER_LABELS" \ + bash -lc ' + set -Eeuo pipefail + cd /var/lib/forgejo-runner + pwd # should print: /var/lib/forgejo-runner + /usr/local/bin/forgejo-runner register \ + --instance "$FORGEJO_URL" \ + --token "$REG_TOKEN" \ + --name "$RUNNER_NAME" \ + --labels "$RUNNER_LABELS" \ + --no-interactive + chmod 600 .runner + stat -c "%U:%G %a %n" .runner +' -1. Go to your Forgejo repository -2. Navigate to **Settings → Actions → Runners** -3. Click **"New runner"** -4. Copy the registration token +##### 11.7 System-wide runner config in /etc -#### 12.3 Register the Production Runner +sudo install -d -m 0755 /etc/forgejo-runner +sudo tee /etc/forgejo-runner/config.yaml >/dev/null </`), not the full path to the repository. The runner registration process will handle connecting to the specific repository based on the token you provide. - -**What this does**: -- Creates the required `.runner` configuration file in the PROD_DEPLOY_USER's home directory -- Registers the runner with your Forgejo instance -- Sets up the runner with appropriate labels for production deployment - -**Step 2: Set Up System Configuration** - -```bash -# Create system config directory for Forgejo runner -sudo mkdir -p /etc/forgejo-runner - -# Copy the runner configuration to system location -sudo mv /home/PROD_DEPLOY_USER/.runner /etc/forgejo-runner/.runner - -# Set proper ownership and permissions -sudo chown PROD_SERVICE_USER:PROD_SERVICE_USER /etc/forgejo-runner/.runner -sudo chmod 600 /etc/forgejo-runner/.runner -``` - -**What this does**: -- Copies the configuration to the system location (`/etc/forgejo-runner/.runner`) -- Sets proper ownership and permissions for PROD_SERVICE_USER to access the config -- Registers the runner with your Forgejo instance -- Sets up the runner with appropriate labels for production deployment - -#### 12.4 Create Systemd Service - -```bash -# Create systemd service file -sudo tee /etc/systemd/system/forgejo-runner.service > /dev/null << 'EOF' +sudo tee /etc/systemd/system/forgejo-runner.service >/dev/null </dev/null || echo "OK: root podman.socket not enabled" +sudo systemctl is-active podman.socket 2>/dev/null || echo "OK: root podman.socket not active" + +# Runner sees the user socket +sudo -iu "$SVC_USER" podman info --format '{{.Host.ServiceIsRemote}} {{.Host.RemoteSocket.Path}}' +# Expect: true unix:///run/user//podman/podman.sock (needs to be true because Forgejo runner needs to connect via the Docker REST API) + +#### 11.10 Test Runner Configuration ```bash # Check if the runner is running @@ -1975,90 +1950,12 @@ sudo journalctl -u forgejo-runner.service -f --no-pager # Verify runner appears in Forgejo # Go to your Forgejo repository → Settings → Actions → Runners -# You should see your runner listed as "prod-runner" with status "Online" +# You should see your runner listed as "ci-runner" with status "Online" ``` -**Expected Output**: -- `systemctl status` should show "active (running)" -- Forgejo web interface should show the runner as online with "prod" label - -**Important**: The CI/CD workflow (`.forgejo/workflows/ci.yml`) is already configured to use this production runner. The deploy job runs on `runs-on: [self-hosted, prod]`, which means it will execute on any runner with the "prod" label. - -**Architecture**: -- **Runner Configuration**: Located in `/etc/forgejo-runner/.runner` (system configuration) -- **Application Deployment**: Located in `/opt/APP_NAME/` (application software) -- **Workflow Process**: Runner starts in `/etc/forgejo-runner`, then checks out directly to `/opt/APP_NAME` - -When the workflow runs, it will: - -1. Pull the latest Docker images from Forgejo Container Registry -2. Use the `prod-pod.yml` file to deploy the application stack -3. Create the necessary environment variables for production deployment -4. Verify that all services are healthy after deployment - -The production runner will automatically handle the deployment process when you push to the main branch. - -#### 12.6 Understanding the Production Pod Setup - -The `prod-pod.yml` file is specifically designed for production deployment and uses Kubernetes pod specifications: - -**Key Features**: -- **Image-based deployment**: Uses pre-built images from Forgejo Container Registry instead of building from source -- **Production networking**: All services communicate through a dedicated `sharenet-network` -- **Health checks**: Each service includes health checks to ensure proper startup order -- **Nginx reverse proxy**: Includes Nginx for SSL termination, load balancing, and security headers -- **Persistent storage**: PostgreSQL data is stored in a named volume for persistence -- **Environment variables**: Uses environment variables for configuration (set by the CI/CD workflow) - -**Security Enhancements**: -- **Rootless operation**: All containers run as non-root user (UID/GID 1000) -- **Read-only filesystems**: Containers have read-only root filesystems with writable volumes only for data -- **Dropped capabilities**: All Linux capabilities are dropped for maximum security -- **No privilege escalation**: Containers cannot escalate privileges -- **Simplified configuration**: No external configmaps - all configuration is inline for easier management - -**Service Architecture**: -1. **PostgreSQL**: Database with health checks and persistent storage -2. **Backend**: Rust API service that waits for PostgreSQL to be healthy -3. **Frontend**: Next.js application that waits for backend to be healthy -4. **Nginx**: Reverse proxy that serves the frontend and proxies API requests to backend - -**Deployment Process**: -1. The production runner pulls the latest images from Forgejo Container Registry -2. Creates environment variables for the deployment -3. Runs `podman play kube prod-pod.yml` -4. Waits for all services to be healthy -5. Verifies the deployment was successful - - - -### Step 13: Test Production Setup - -#### 13.1 Test Podman Installation - -```bash -# Test Podman installation (run as PROD_SERVICE_USER) -sudo -u PROD_SERVICE_USER podman --version -``` - -#### 13.2 Test Forgejo Container Registry Access - -```bash -# Test pulling an image from Forgejo Container Registry (run as PROD_SERVICE_USER) -sudo -u PROD_SERVICE_USER podman pull YOUR_CI_CD_IP/APP_NAME/test:latest -``` - -**Important**: Replace `YOUR_CI_CD_IP` with your actual CI/CD Linode IP address. - -**Note**: Production uses unauthenticated pulls from the standard HTTPS port (443) for deployment operations. - -**Note**: Application deployment testing will be done in Step 19 after the complete CI/CD pipeline is set up. - ---- - ## Part 3: Final Configuration and Testing -### Step 14: Configure Forgejo Repository Secrets +### Step 12: Configure Forgejo Repository Secrets Go to your Forgejo repository and add these secrets in **Settings → Secrets and Variables → Actions**: @@ -2084,7 +1981,7 @@ Go to your Forgejo repository and add these secrets in **Settings → Secrets an DIGEST=$(podman manifest inspect quay.io/podman/stable:latest | jq -r '.manifests[] | select(.platform.os=="linux" and .platform.architecture=="amd64") | .digest') export PODMAN_CLIENT_IMG_DIGEST="quay.io/podman/stable@${DIGEST}" echo "PODMAN_CLIENT_IMG_DIGEST=${PODMAN_CLIENT_IMG_DIGEST}" - # Result: quay.io/podman/stable@sha256:5dd9f78bd233970ea4a36bb65d5fc63b7edbb9c7f800ab7901fa912564f36415 + # Result: PODMAN_CLIENT_IMG_DIGEST=quay.io/podman/stable@sha256:482bce3a829893f0dc3bf497c9a7609341fca11b34e35a92d308eb971ad61adb ``` - **`RUST_IMG_DIGEST`**: Used for Rust backend testing and building @@ -2093,7 +1990,7 @@ Go to your Forgejo repository and add these secrets in **Settings → Secrets an DIGEST=$(podman manifest inspect docker.io/library/rust:latest | jq -r '.manifests[] | select(.platform.os=="linux" and .platform.architecture=="amd64") | .digest') export RUST_IMG_DIGEST="docker.io/library/rust@${DIGEST}" echo "RUST_IMG_DIGEST=${RUST_IMG_DIGEST}" - # Result: docker.io/library/rust@sha256:... + # Result: RUST_IMG_DIGEST=docker.io/library/rust@sha256:f61d2a4020b0dec1f21c2320fdcb8b256dd96dfc015a090893b11841bb708983 ``` - **`NODE_IMG_DIGEST`**: Used for Node.js frontend testing and building @@ -2102,7 +1999,7 @@ Go to your Forgejo repository and add these secrets in **Settings → Secrets an DIGEST=$(podman manifest inspect docker.io/library/node:latest | jq -r '.manifests[] | select(.platform.os=="linux" and .platform.architecture=="amd64") | .digest') export NODE_IMG_DIGEST="docker.io/library/node@${DIGEST}" echo "NODE_IMG_DIGEST=${NODE_IMG_DIGEST}" - # Result: docker.io/library/node@sha256:... + # Result: NODE_IMG_DIGEST=docker.io/library/node@sha256:9d4ff7cc3a5924a28389087d9735dfbf77ccb04bc3a0d5f86016d484dfa965c1 ``` - **`POSTGRES_IMG_DIGEST`**: Used for PostgreSQL database in integration tests @@ -2111,7 +2008,7 @@ Go to your Forgejo repository and add these secrets in **Settings → Secrets an DIGEST=$(podman manifest inspect docker.io/library/postgres:latest | jq -r '.manifests[] | select(.platform.os=="linux" and .platform.architecture=="amd64") | .digest') export POSTGRES_IMG_DIGEST="docker.io/library/postgres@${DIGEST}" echo "POSTGRES_IMG_DIGEST=${POSTGRES_IMG_DIGEST}" - # Result: docker.io/library/postgres@sha256:... + # Result: POSTGRES_IMG_DIGEST=docker.io/library/postgres@sha256:16508ad37e81dd63a94cdc620b0cfa1b771c4176b4e0f1cbc3a670431643e3ed ``` **2. SSH Keys (Secure Deployment Access):** @@ -2151,15 +2048,15 @@ Go to your Forgejo repository and add these secrets in **Settings → Secrets an **Note**: This setup uses custom Dockerfiles for testing environments with base images. The CI pipeline automatically checks if base images exist in Forgejo Container Registry and pulls them from Docker Hub only when needed, eliminating rate limiting issues and providing better control over the testing environment. -### Step 15: Test Complete Pipeline +### Step 13: Test Complete Pipeline -#### 15.1 Trigger a Test Build +#### 13.1 Trigger a Test Build 1. **Make a small change** to your repository (e.g., update a comment or add a test file) 2. **Commit and push** the changes to trigger the CI/CD pipeline 3. **Monitor the build** in your Forgejo repository → Actions tab -#### 15.2 Verify Pipeline Steps +#### 13.2 Verify Pipeline Steps The pipeline should execute these steps in order: @@ -2172,7 +2069,7 @@ The pipeline should execute these steps in order: 7. **Push to Registry**: Push images to Forgejo Container Registry from DinD 8. **Deploy to Production**: Deploy to production server -#### 15.3 Check Forgejo Container Registry +#### 13.3 Check Forgejo Container Registry ```bash # On CI/CD Linode @@ -2193,7 +2090,7 @@ curl -k https://YOUR_CI_CD_IP:4443/v2/_catalog # Expected: This should return authentication error without credentials ``` -#### 15.4 Verify Production Deployment +#### 13.4 Verify Production Deployment ```bash # On Production Linode @@ -2211,16 +2108,16 @@ podman logs sharenet-production-pod-backend podman logs sharenet-production-pod-frontend ``` -#### 15.5 Test Application Functionality +#### 13.5 Test Application Functionality 1. **Frontend**: Visit your production URL (IP address) 2. **Backend API**: Test API endpoints 3. **Database**: Verify database connections 4. **Logs**: Check for any errors in application logs -### Step 16: Final Verification +### Step 14: Final Verification -#### 16.1 Security Check +#### 14.1 Security Check ```bash # Check firewall status @@ -2233,7 +2130,7 @@ sudo systemctl status fail2ban sudo grep "PasswordAuthentication" /etc/ssh/sshd_config ``` -#### 16.2 Performance Check +#### 14.2 Performance Check ```bash # Check system resources @@ -2246,7 +2143,7 @@ df -h docker system df ``` -#### 16.3 Backup Verification +#### 14.3 Backup Verification ```bash # Test backup script @@ -2257,16 +2154,16 @@ cd /opt/APP_NAME ./scripts/backup.sh ``` -### Step 17: Documentation and Maintenance +### Step 15: Documentation and Maintenance -#### 17.1 Update Documentation +#### 15.1 Update Documentation 1. **Update README.md** with deployment information 2. **Document environment variables** and their purposes 3. **Create troubleshooting guide** for common issues 4. **Document backup and restore procedures** -#### 17.2 Set Up Monitoring Alerts +#### 15.2 Set Up Monitoring Alerts ```bash # Set up monitoring cron job @@ -2276,7 +2173,7 @@ cd /opt/APP_NAME tail -f /tmp/monitor.log ``` -#### 17.3 Regular Maintenance Tasks +#### 15.3 Regular Maintenance Tasks **Daily:** - Check application logs for errors