mirror of
https://github.com/coollabsio/coolify.git
synced 2026-06-19 07:35:25 +00:00
fix(ssh): rely on lazy multiplexed connections
Remove explicit SSH master pre-warming and lock handling so OpenSSH manages ControlMaster creation lazily from real ssh/scp commands. Add cleanup for duplicate mux processes and update coverage around mux command options and stale process cleanup.
This commit is contained in:
@@ -4,8 +4,6 @@ namespace App\Helpers;
|
||||
|
||||
use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use Illuminate\Contracts\Cache\LockTimeoutException;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Hash;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
@@ -13,210 +11,60 @@ use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class SshMultiplexingHelper
|
||||
{
|
||||
public static function serverSshConfiguration(Server $server)
|
||||
public static function serverSshConfiguration(Server $server): array
|
||||
{
|
||||
$privateKey = PrivateKey::findOrFail($server->private_key_id);
|
||||
$sshKeyLocation = $privateKey->getKeyLocation();
|
||||
$muxFilename = '/var/www/html/storage/app/ssh/mux/mux_'.$server->uuid;
|
||||
|
||||
return [
|
||||
'sshKeyLocation' => $sshKeyLocation,
|
||||
'muxFilename' => $muxFilename,
|
||||
'sshKeyLocation' => $privateKey->getKeyLocation(),
|
||||
'muxFilename' => self::muxSocket($server),
|
||||
];
|
||||
}
|
||||
|
||||
public static function ensureMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
if (! self::isMultiplexingEnabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fast path: a usable master already exists, no need to lock.
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Slow path: establishing or refreshing the master. Serialize per server
|
||||
// so concurrent workers do not each spawn their own master process,
|
||||
// leaving orphaned non-master ssh connections that ControlPersist never reaps.
|
||||
try {
|
||||
return Cache::lock(
|
||||
self::connectionLockKey($server),
|
||||
config('constants.ssh.mux_lock_ttl')
|
||||
)->block(config('constants.ssh.mux_lock_timeout'), function () use ($server) {
|
||||
// Double-checked: another worker may have established the master
|
||||
// while we were waiting for the lock.
|
||||
if (self::connectionIsReusable($server)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// A master exists but is stale or expired: close and re-establish.
|
||||
if (self::masterConnectionExists($server)) {
|
||||
return self::refreshMultiplexedConnection($server);
|
||||
}
|
||||
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
});
|
||||
} catch (LockTimeoutException) {
|
||||
Log::warning('SSH multiplexing lock timeout, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
]);
|
||||
|
||||
return false;
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('SSH multiplexing lock unavailable, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
return false;
|
||||
}
|
||||
return self::isMultiplexingEnabled();
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-server, per-host lock key for serializing master establishment.
|
||||
*
|
||||
* The mux socket is a host-local unix socket, so the lock is scoped to the
|
||||
* current Coolify host: workers on the same host share a master and must
|
||||
* serialize, while workers on other hosts manage their own masters and must
|
||||
* not block on each other.
|
||||
*/
|
||||
private static function connectionLockKey(Server $server): string
|
||||
public static function removeMuxFile(Server $server): void
|
||||
{
|
||||
return 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a multiplexed master connection currently exists for the server.
|
||||
*/
|
||||
private static function masterConnectionExists(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
|
||||
$checkCommand = "ssh -O check -o ControlPath=$muxSocket ";
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$checkCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$checkCommand .= self::escapedUserAtHost($server);
|
||||
|
||||
return Process::run($checkCommand)->exitCode() === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the existing master connection can be reused as-is
|
||||
* (it exists, has not exceeded its max age, and passes the health check).
|
||||
*/
|
||||
private static function connectionIsReusable(Server $server): bool
|
||||
{
|
||||
if (! self::masterConnectionExists($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Existing connection but no metadata, store current time as fallback.
|
||||
if (self::getConnectionAge($server) === null) {
|
||||
self::storeConnectionMetadata($server);
|
||||
}
|
||||
|
||||
if (self::isConnectionExpired($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config('constants.ssh.mux_health_check_enabled') && ! self::isConnectionHealthy($server)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function establishNewMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$sshKeyLocation = $sshConfig['sshKeyLocation'];
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$connectionTimeout = self::getConnectionTimeout($server);
|
||||
$serverInterval = config('constants.ssh.server_interval');
|
||||
$muxPersistTime = config('constants.ssh.mux_persist_time');
|
||||
|
||||
// No -M: it forces master mode and overrides ControlMaster=auto. When a
|
||||
// socket already exists -M leaves an orphaned non-master ssh -fN process
|
||||
// that ControlPersist never reaps. ControlMaster=auto reuses instead.
|
||||
$establishCommand = "ssh -fN -o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$establishCommand .= ' -o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$establishCommand .= self::getCommonSshOptions($server, $sshKeyLocation, $connectionTimeout, $serverInterval);
|
||||
$establishCommand .= self::escapedUserAtHost($server);
|
||||
$establishProcess = Process::run($establishCommand);
|
||||
if ($establishProcess->exitCode() !== 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Store connection metadata for tracking
|
||||
self::storeConnectionMetadata($server);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function removeMuxFile(Server $server)
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
|
||||
$closeCommand = "ssh -O exit -o ControlPath=$muxSocket ";
|
||||
$closeCommand = 'ssh -O exit -o ControlPath='.self::muxSocket($server).' ';
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$closeCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$closeCommand .= self::escapedUserAtHost($server);
|
||||
Process::run($closeCommand);
|
||||
|
||||
// Clear connection metadata from cache
|
||||
self::clearConnectionMetadata($server);
|
||||
Process::run($closeCommand);
|
||||
}
|
||||
|
||||
public static function generateScpCommand(Server $server, string $source, string $dest)
|
||||
public static function generateScpCommand(Server $server, string $source, string $dest): string
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$sshKeyLocation = $sshConfig['sshKeyLocation'];
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$scpCommand = 'timeout '.config('constants.ssh.command_timeout').' scp ';
|
||||
|
||||
$timeout = config('constants.ssh.command_timeout');
|
||||
$muxPersistTime = config('constants.ssh.mux_persist_time');
|
||||
|
||||
$scp_command = "timeout $timeout scp ";
|
||||
if ($server->isIpv6()) {
|
||||
$scp_command .= '-6 ';
|
||||
$scpCommand .= '-6 ';
|
||||
}
|
||||
|
||||
if (self::isMultiplexingEnabled()) {
|
||||
try {
|
||||
if (self::ensureMultiplexedConnection($server)) {
|
||||
$scp_command .= "-o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
Log::warning('SSH multiplexing failed for SCP, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
// Continue without multiplexing
|
||||
}
|
||||
$scpCommand .= self::multiplexingOptions($server);
|
||||
}
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$scp_command .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
$scpCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
|
||||
$scp_command .= self::getCommonSshOptions($server, $sshKeyLocation, self::getConnectionTimeout($server), config('constants.ssh.server_interval'), isScp: true);
|
||||
$scpCommand .= self::getCommonSshOptions($server, $sshKeyLocation, self::getConnectionTimeout($server), config('constants.ssh.server_interval'), isScp: true);
|
||||
|
||||
if ($server->isIpv6()) {
|
||||
$scp_command .= "{$source} ".escapeshellarg($server->user).'@['.escapeshellarg($server->ip)."]:{$dest}";
|
||||
} else {
|
||||
$scp_command .= "{$source} ".self::escapedUserAtHost($server).":{$dest}";
|
||||
return $scpCommand."{$source} ".escapeshellarg($server->user).'@['.escapeshellarg($server->ip)."]:{$dest}";
|
||||
}
|
||||
|
||||
return $scp_command;
|
||||
return $scpCommand."{$source} ".self::escapedUserAtHost($server).":{$dest}";
|
||||
}
|
||||
|
||||
public static function generateSshCommand(Server $server, string $command, bool $disableMultiplexing = false)
|
||||
public static function generateSshCommand(Server $server, string $command, bool $disableMultiplexing = false): string
|
||||
{
|
||||
if ($server->settings->force_disabled) {
|
||||
throw new \RuntimeException('Server is disabled.');
|
||||
@@ -227,44 +75,36 @@ class SshMultiplexingHelper
|
||||
|
||||
self::validateSshKey($server->privateKey);
|
||||
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$sshCommand = 'timeout '.config('constants.ssh.command_timeout').' ssh ';
|
||||
|
||||
$timeout = config('constants.ssh.command_timeout');
|
||||
$muxPersistTime = config('constants.ssh.mux_persist_time');
|
||||
|
||||
$ssh_command = "timeout $timeout ssh ";
|
||||
|
||||
$multiplexingSuccessful = false;
|
||||
if (! $disableMultiplexing && self::isMultiplexingEnabled()) {
|
||||
try {
|
||||
$multiplexingSuccessful = self::ensureMultiplexedConnection($server);
|
||||
if ($multiplexingSuccessful) {
|
||||
$ssh_command .= "-o ControlMaster=auto -o ControlPath=$muxSocket -o ControlPersist={$muxPersistTime} ";
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
Log::warning('SSH multiplexing failed, falling back to non-multiplexed connection', [
|
||||
'server' => $server->name ?? $server->ip,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
// Continue without multiplexing
|
||||
}
|
||||
$sshCommand .= self::multiplexingOptions($server);
|
||||
}
|
||||
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$ssh_command .= "-o ProxyCommand='cloudflared access ssh --hostname %h' ";
|
||||
$sshCommand .= "-o ProxyCommand='cloudflared access ssh --hostname %h' ";
|
||||
}
|
||||
|
||||
$ssh_command .= self::getCommonSshOptions($server, $sshKeyLocation, self::getConnectionTimeout($server), config('constants.ssh.server_interval'));
|
||||
$sshCommand .= self::getCommonSshOptions($server, $sshKeyLocation, self::getConnectionTimeout($server), config('constants.ssh.server_interval'));
|
||||
|
||||
$delimiter = Hash::make($command);
|
||||
$delimiter = base64_encode($delimiter);
|
||||
$delimiter = base64_encode(Hash::make($command));
|
||||
$command = str_replace($delimiter, '', $command);
|
||||
|
||||
$ssh_command .= self::escapedUserAtHost($server)." 'bash -se' << \\$delimiter".PHP_EOL
|
||||
return $sshCommand.self::escapedUserAtHost($server)." 'bash -se' << \\$delimiter".PHP_EOL
|
||||
.$command.PHP_EOL
|
||||
.$delimiter;
|
||||
}
|
||||
|
||||
return $ssh_command;
|
||||
private static function multiplexingOptions(Server $server): string
|
||||
{
|
||||
return '-o ControlMaster=auto '
|
||||
.'-o ControlPath='.self::muxSocket($server).' '
|
||||
.'-o ControlPersist='.config('constants.ssh.mux_persist_time').' ';
|
||||
}
|
||||
|
||||
private static function muxSocket(Server $server): string
|
||||
{
|
||||
return '/var/www/html/storage/app/ssh/mux/mux_'.$server->uuid;
|
||||
}
|
||||
|
||||
private static function escapedUserAtHost(Server $server): string
|
||||
@@ -301,7 +141,6 @@ class SshMultiplexingHelper
|
||||
$privateKey->storeInFileSystem();
|
||||
}
|
||||
|
||||
// Ensure correct permissions (SSH requires 0600)
|
||||
if (file_exists($keyLocation)) {
|
||||
$currentPerms = fileperms($keyLocation) & 0777;
|
||||
if ($currentPerms !== 0600 && ! chmod($keyLocation, 0600)) {
|
||||
@@ -332,90 +171,10 @@ class SshMultiplexingHelper
|
||||
.'-o RequestTTY=no '
|
||||
.'-o LogLevel=ERROR ';
|
||||
|
||||
// Bruh
|
||||
if ($isScp) {
|
||||
$options .= '-P '.escapeshellarg((string) $server->port).' ';
|
||||
} else {
|
||||
$options .= '-p '.escapeshellarg((string) $server->port).' ';
|
||||
return $options.'-P '.escapeshellarg((string) $server->port).' ';
|
||||
}
|
||||
|
||||
return $options;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the multiplexed connection is healthy by running a test command
|
||||
*/
|
||||
public static function isConnectionHealthy(Server $server): bool
|
||||
{
|
||||
$sshConfig = self::serverSshConfiguration($server);
|
||||
$muxSocket = $sshConfig['muxFilename'];
|
||||
$healthCheckTimeout = config('constants.ssh.mux_health_check_timeout');
|
||||
|
||||
$healthCommand = "timeout $healthCheckTimeout ssh -o ControlMaster=auto -o ControlPath=$muxSocket ";
|
||||
if (data_get($server, 'settings.is_cloudflare_tunnel')) {
|
||||
$healthCommand .= '-o ProxyCommand="cloudflared access ssh --hostname %h" ';
|
||||
}
|
||||
$healthCommand .= self::escapedUserAtHost($server)." 'echo \"health_check_ok\"'";
|
||||
|
||||
$process = Process::run($healthCommand);
|
||||
$isHealthy = $process->exitCode() === 0 && str_contains($process->output(), 'health_check_ok');
|
||||
|
||||
return $isHealthy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the connection has exceeded its maximum age
|
||||
*/
|
||||
public static function isConnectionExpired(Server $server): bool
|
||||
{
|
||||
$connectionAge = self::getConnectionAge($server);
|
||||
$maxAge = config('constants.ssh.mux_max_age');
|
||||
|
||||
return $connectionAge !== null && $connectionAge > $maxAge;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the age of the current connection in seconds
|
||||
*/
|
||||
public static function getConnectionAge(Server $server): ?int
|
||||
{
|
||||
$cacheKey = "ssh_mux_connection_time_{$server->uuid}";
|
||||
$connectionTime = Cache::get($cacheKey);
|
||||
|
||||
if ($connectionTime === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return time() - $connectionTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh a multiplexed connection by closing and re-establishing it
|
||||
*/
|
||||
public static function refreshMultiplexedConnection(Server $server): bool
|
||||
{
|
||||
// Close existing connection
|
||||
self::removeMuxFile($server);
|
||||
|
||||
// Establish new connection
|
||||
return self::establishNewMultiplexedConnection($server);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store connection metadata when a new connection is established
|
||||
*/
|
||||
private static function storeConnectionMetadata(Server $server): void
|
||||
{
|
||||
$cacheKey = "ssh_mux_connection_time_{$server->uuid}";
|
||||
Cache::put($cacheKey, time(), config('constants.ssh.mux_persist_time') + 300); // Cache slightly longer than persist time
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear connection metadata from cache
|
||||
*/
|
||||
private static function clearConnectionMetadata(Server $server): void
|
||||
{
|
||||
$cacheKey = "ssh_mux_connection_time_{$server->uuid}";
|
||||
Cache::forget($cacheKey);
|
||||
return $options.'-p '.escapeshellarg((string) $server->port).' ';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,10 +21,44 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
{
|
||||
$this->cleanupStaleConnections();
|
||||
$this->cleanupNonExistentServerConnections();
|
||||
$this->cleanupDuplicateSshProcesses();
|
||||
$this->cleanupOrphanedSshProcesses();
|
||||
$this->cleanupOrphanedCloudflaredProcesses();
|
||||
}
|
||||
|
||||
/**
|
||||
* Once two background ssh masters share the same ControlPath, OpenSSH's
|
||||
* control socket state is no longer trustworthy: `ssh -O check` may report
|
||||
* one PID while the socket lifecycle is tied to another. Reset the whole
|
||||
* duplicate group rather than trying to choose an owner.
|
||||
*/
|
||||
private function cleanupDuplicateSshProcesses(): void
|
||||
{
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
$groups = [];
|
||||
|
||||
foreach ($this->listProcesses() as $process) {
|
||||
if (! preg_match('#(^|/)ssh -fN#', $process['args'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$controlPath = $this->extractControlPath($process['args']);
|
||||
if (! is_string($controlPath) || ! str_starts_with($controlPath, $muxDir.'/')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$groups[$controlPath][] = $process;
|
||||
}
|
||||
|
||||
foreach ($groups as $controlPath => $processes) {
|
||||
if (count($processes) < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->resetDuplicateGroup($controlPath, $processes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill backgrounded ssh master processes that lost the ControlPath socket
|
||||
* race. Such processes are not masters, so ControlPersist never reaps them
|
||||
@@ -149,6 +183,43 @@ class CleanupStaleMultiplexedConnections implements ShouldQueue
|
||||
return $processes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param list<array{pid: string, ppid: string, etimes: int, args: string}> $processes
|
||||
*/
|
||||
private function resetDuplicateGroup(string $controlPath, array $processes): void
|
||||
{
|
||||
if (! config('constants.ssh.mux_orphan_reap_enabled')) {
|
||||
Log::info('Duplicate ssh mux processes detected (dry-run, not killed)', [
|
||||
'control_path' => $controlPath,
|
||||
'pids' => array_column($processes, 'pid'),
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($processes as $process) {
|
||||
Process::run('kill '.escapeshellarg($process['pid']));
|
||||
}
|
||||
|
||||
if (file_exists($controlPath)) {
|
||||
@unlink($controlPath);
|
||||
}
|
||||
|
||||
Log::info('Reset duplicate ssh mux processes', [
|
||||
'control_path' => $controlPath,
|
||||
'pids' => array_column($processes, 'pid'),
|
||||
]);
|
||||
}
|
||||
|
||||
private function extractControlPath(string $args): ?string
|
||||
{
|
||||
if (! preg_match('/(?:^|\s)-o\s+ControlPath=(?:"([^"]+)"|\'([^\']+)\'|(\S+))/', $args, $matches)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $matches[1] ?: ($matches[2] ?: $matches[3]);
|
||||
}
|
||||
|
||||
private function cleanupStaleConnections()
|
||||
{
|
||||
$muxFiles = Storage::disk('ssh-mux')->files();
|
||||
|
||||
@@ -6,15 +6,14 @@ use App\Models\PrivateKey;
|
||||
use App\Models\Server;
|
||||
use App\Models\User;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\File;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
/**
|
||||
* Tests for the per-server lock that prevents concurrent workers from each
|
||||
* spawning their own SSH master, which leaves orphaned non-master ssh
|
||||
* connections that ControlPersist never reaps (memory leak).
|
||||
* SSH multiplexing now relies on OpenSSH's native lazy ControlMaster handling.
|
||||
* Coolify should add mux options to real ssh/scp commands, but must not pre-warm
|
||||
* background masters with separate `ssh -fN` processes.
|
||||
*/
|
||||
uses(RefreshDatabase::class);
|
||||
|
||||
@@ -23,80 +22,91 @@ function makeMuxServer(): Server
|
||||
$user = User::factory()->create();
|
||||
$team = $user->teams()->first();
|
||||
|
||||
$privateKeyContent = "-----BEGIN OPENSSH PRIVATE KEY-----\n".
|
||||
"b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\n".
|
||||
"QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk\n".
|
||||
"hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA\n".
|
||||
"AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV\n".
|
||||
"uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==\n".
|
||||
'-----END OPENSSH PRIVATE KEY-----';
|
||||
|
||||
$privateKey = PrivateKey::create([
|
||||
'name' => 'mux-test-key-'.uniqid(),
|
||||
'private_key' => "-----BEGIN OPENSSH PRIVATE KEY-----\n".
|
||||
"b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW\n".
|
||||
"QyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevAAAAJi/QySHv0Mk\n".
|
||||
"hwAAAAtzc2gtZWQyNTUxOQAAACBbhpqHhqv6aI67Mj9abM3DVbmcfYhZAhC7ca4d9UCevA\n".
|
||||
"AAAECBQw4jg1WRT2IGHMncCiZhURCts2s24HoDS0thHnnRKVuGmoeGq/pojrsyP1pszcNV\n".
|
||||
"uZx9iFkCELtxrh31QJ68AAAAEXNhaWxANzZmZjY2ZDJlMmRkAQIDBA==\n".
|
||||
'-----END OPENSSH PRIVATE KEY-----',
|
||||
'private_key' => $privateKeyContent,
|
||||
'team_id' => $team->id,
|
||||
]);
|
||||
|
||||
Storage::fake('ssh-keys');
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$privateKey->uuid}", $privateKeyContent);
|
||||
|
||||
return Server::factory()->create([
|
||||
'team_id' => $team->id,
|
||||
'private_key_id' => $privateKey->id,
|
||||
]);
|
||||
}
|
||||
|
||||
it('establishes a master with ssh -fN and never the orphan-prone ssh -fNM', function () {
|
||||
it('does not prewarm a background ssh master', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1), // no existing master
|
||||
'*-fN *' => Process::result(exitCode: 0), // establish succeeds
|
||||
]);
|
||||
Process::fake();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'ssh -fN ')
|
||||
&& ! str_contains($process->command, 'ssh -fNM'));
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('reuses an existing healthy master without spawning a new one', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_health_check_enabled' => true,
|
||||
]);
|
||||
it('adds native openssh multiplexing options to ssh commands', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
Process::fake();
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok');
|
||||
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600')
|
||||
->not->toContain('ssh -fN')
|
||||
->not->toContain('-O check');
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('omits native multiplexing options when ssh multiplexing is disabled for a command', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
Storage::disk('ssh-keys')->put("ssh_key@{$server->privateKey->uuid}", $server->privateKey->private_key);
|
||||
|
||||
$command = SshMultiplexingHelper::generateSshCommand($server, 'echo ok', disableMultiplexing: true);
|
||||
|
||||
expect($command)
|
||||
->not->toContain('-o ControlMaster=auto')
|
||||
->not->toContain('-o ControlPath=')
|
||||
->not->toContain('-o ControlPersist=');
|
||||
});
|
||||
|
||||
it('adds native openssh multiplexing options to scp commands', function () {
|
||||
config(['constants.ssh.mux_enabled' => true]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 0),
|
||||
'*health_check_ok*' => Process::result(output: 'health_check_ok', exitCode: 0),
|
||||
]);
|
||||
Process::fake();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeTrue();
|
||||
$command = SshMultiplexingHelper::generateScpCommand($server, '/tmp/source', '/tmp/dest');
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN'));
|
||||
expect($command)
|
||||
->toContain('-o ControlMaster=auto')
|
||||
->toContain("-o ControlPath=/var/www/html/storage/app/ssh/mux/mux_{$server->uuid}")
|
||||
->toContain('-o ControlPersist=3600')
|
||||
->not->toContain('ssh -fN')
|
||||
->not->toContain('-O check');
|
||||
|
||||
Process::assertNothingRan();
|
||||
});
|
||||
|
||||
it('does not spawn a master when the per-server lock is already held', function () {
|
||||
config([
|
||||
'constants.ssh.mux_enabled' => true,
|
||||
'constants.ssh.mux_lock_timeout' => 0,
|
||||
]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
Process::fake([
|
||||
'*-O check*' => Process::result(exitCode: 1), // forces the slow path
|
||||
]);
|
||||
|
||||
// Simulate another worker on the same host holding the lock for this server.
|
||||
$lockKey = 'ssh_mux_lock_'.(gethostname() ?: 'unknown').'_'.$server->uuid;
|
||||
$held = Cache::lock($lockKey, 30);
|
||||
expect($held->get())->toBeTrue();
|
||||
|
||||
expect(SshMultiplexingHelper::ensureMultiplexedConnection($server))->toBeFalse();
|
||||
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'ssh -fN '));
|
||||
|
||||
$held->release();
|
||||
});
|
||||
|
||||
it('returns false and runs no ssh when multiplexing is disabled', function () {
|
||||
it('returns false and runs no process when multiplexing is globally disabled', function () {
|
||||
config(['constants.ssh.mux_enabled' => false]);
|
||||
$server = makeMuxServer();
|
||||
|
||||
@@ -115,9 +125,8 @@ it('kills only old orphaned ssh masters whose control socket no longer exists',
|
||||
$liveSocket = $muxDir.'/mux_live_'.uniqid();
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid();
|
||||
$youngSocket = $muxDir.'/mux_young_'.uniqid();
|
||||
File::put($liveSocket, 'x'); // live master owns its socket file; the others do not
|
||||
File::put($liveSocket, 'x');
|
||||
|
||||
// Columns: pid ppid etimes args
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "111 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$liveSocket} root@1.2.3.4\n".
|
||||
"222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4\n".
|
||||
@@ -130,11 +139,8 @@ it('kills only old orphaned ssh masters whose control socket no longer exists',
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Old orphan: killed.
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '222'));
|
||||
// Live master (socket exists): spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '111'));
|
||||
// Young process (may be mid-establish): spared despite missing socket.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '333'));
|
||||
|
||||
File::delete($liveSocket);
|
||||
@@ -142,8 +148,7 @@ it('kills only old orphaned ssh masters whose control socket no longer exists',
|
||||
|
||||
it('kills only old orphaned cloudflared proxies whose parent ssh is gone', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
// pid 100 = live ssh master; 200 = its legit child; 300 = old orphan;
|
||||
// 400 = young orphan (parent ssh may still be starting up).
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "100 1 5000 ssh -fN -o ControlMaster=auto root@1.2.3.4\n".
|
||||
"200 100 5000 cloudflared access ssh --hostname host.example.com\n".
|
||||
@@ -158,11 +163,8 @@ it('kills only old orphaned cloudflared proxies whose parent ssh is gone', funct
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Old orphan (parent not ssh): killed.
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '300'));
|
||||
// Legit proxy (parent ssh alive): spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '200'));
|
||||
// Young orphan: spared.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '400'));
|
||||
});
|
||||
|
||||
@@ -171,7 +173,7 @@ it('dry-run mode logs orphans but kills nothing when reaping is disabled', funct
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid(); // no file: a real old orphan
|
||||
$orphanSocket = $muxDir.'/mux_orphan_'.uniqid();
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$orphanSocket} root@1.2.3.4\n"),
|
||||
@@ -183,16 +185,38 @@ it('dry-run mode logs orphans but kills nothing when reaping is disabled', funct
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
// Orphan detected, but dry-run: nothing killed.
|
||||
Process::assertNotRan(fn ($process) => str_contains($process->command, 'kill'));
|
||||
});
|
||||
|
||||
it('resets duplicate ssh mux process groups atomically when reaping is enabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
$muxDir = storage_path('app/ssh/mux');
|
||||
File::ensureDirectoryExists($muxDir);
|
||||
$controlPath = $muxDir.'/mux_duplicate_'.uniqid();
|
||||
File::put($controlPath, 'socket');
|
||||
|
||||
Process::fake([
|
||||
'ps*' => Process::result(output: "111 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$controlPath} root@1.2.3.4\n".
|
||||
"222 1 5000 ssh -fN -o ControlMaster=auto -o ControlPath={$controlPath} root@1.2.3.4\n"),
|
||||
'kill*' => Process::result(exitCode: 0),
|
||||
]);
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupDuplicateSshProcesses');
|
||||
$method->setAccessible(true);
|
||||
$method->invoke($job);
|
||||
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '111'));
|
||||
Process::assertRan(fn ($process) => str_contains($process->command, 'kill') && str_contains($process->command, '222'));
|
||||
expect(file_exists($controlPath))->toBeFalse();
|
||||
});
|
||||
|
||||
it('removes mux files for non-existent servers when reaping is enabled', function () {
|
||||
config(['constants.ssh.mux_orphan_reap_enabled' => true]);
|
||||
Storage::fake('ssh-mux');
|
||||
$file = 'mux_ghost'.uniqid();
|
||||
Storage::disk('ssh-mux')->put($file, 'x');
|
||||
Process::fake(); // the `ssh -O exit` close command
|
||||
Process::fake();
|
||||
|
||||
$job = new CleanupStaleMultiplexedConnections;
|
||||
$method = new ReflectionMethod($job, 'cleanupNonExistentServerConnections');
|
||||
|
||||
Reference in New Issue
Block a user