diff --git a/docs/env-variables.md b/docs/env-variables.md index e6fa7ca5..6b5bd28c 100644 --- a/docs/env-variables.md +++ b/docs/env-variables.md @@ -27,7 +27,8 @@ | `--git-url` | `ENVBUILDER_GIT_URL` | | The URL of a Git repository containing a Devcontainer or Docker image to clone. This is optional. | | `--git-clone-depth` | `ENVBUILDER_GIT_CLONE_DEPTH` | | The depth to use when cloning the Git repository. | | `--git-clone-single-branch` | `ENVBUILDER_GIT_CLONE_SINGLE_BRANCH` | | Clone only a single branch of the Git repository. | -| `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.zaure.com. | +| `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.azure.com. | +| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth. | | `--git-username` | `ENVBUILDER_GIT_USERNAME` | | The username to use for Git authentication. This is optional. | | `--git-password` | `ENVBUILDER_GIT_PASSWORD` | | The password to use for Git authentication. This is optional. | | `--git-ssh-private-key-path` | `ENVBUILDER_GIT_SSH_PRIVATE_KEY_PATH` | | Path to an SSH private key to be used for Git authentication. If this is set, then GIT_SSH_PRIVATE_KEY_BASE64 cannot be set. | diff --git a/git/git.go b/git/git.go index 320b40c4..65edbcfe 100644 --- a/git/git.go +++ b/git/git.go @@ -7,7 +7,10 @@ import ( "fmt" "io" "net" + "net/url" "os" + "path" + "regexp" "strings" "github.com/coder/envbuilder/internal/ebutil" @@ -15,6 +18,7 @@ import ( "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/protocol/packp/capability" @@ -32,15 +36,16 @@ type CloneRepoOptions struct { Path string Storage billy.Filesystem - RepoURL string - RepoAuth transport.AuthMethod - Progress sideband.Progress - Insecure bool - SingleBranch bool - ThinPack bool - Depth int - CABundle []byte - ProxyOptions transport.ProxyOptions + RepoURL string + RepoAuth transport.AuthMethod + Progress sideband.Progress + Insecure bool + SingleBranch bool + ThinPack bool + Depth int + CABundle []byte + ProxyOptions transport.ProxyOptions + SubmoduleDepth int // 0 = disabled, >0 = max recursion depth } // CloneRepo will clone the repository at the given URL into the given path. @@ -115,7 +120,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt return false, nil } - _, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ + repo, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{ URL: parsed.Cleaned, Auth: opts.RepoAuth, Progress: opts.Progress, @@ -132,6 +137,15 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if err != nil { return false, fmt.Errorf("clone %q: %w", opts.RepoURL, err) } + + // Initialize submodules if requested + if opts.SubmoduleDepth > 0 { + err = initSubmodules(ctx, logf, repo, opts, 1) + if err != nil { + return true, fmt.Errorf("init submodules: %w", err) + } + } + return true, nil } @@ -354,14 +368,15 @@ func CloneOptionsFromOptions(logf func(string, ...any), options options.Options) } cloneOpts := CloneRepoOptions{ - RepoURL: options.GitURL, - Path: options.WorkspaceFolder, - Storage: options.Filesystem, - Insecure: options.Insecure, - SingleBranch: options.GitCloneSingleBranch, - ThinPack: options.GitCloneThinPack, - Depth: int(options.GitCloneDepth), - CABundle: caBundle, + RepoURL: options.GitURL, + Path: options.WorkspaceFolder, + Storage: options.Filesystem, + Insecure: options.Insecure, + SingleBranch: options.GitCloneSingleBranch, + ThinPack: options.GitCloneThinPack, + Depth: int(options.GitCloneDepth), + CABundle: caBundle, + SubmoduleDepth: options.GitCloneSubmodules, } cloneOpts.RepoAuth = SetupRepoAuth(logf, &options) @@ -419,3 +434,368 @@ func ProgressWriter(write func(line string, args ...any)) io.WriteCloser { done: done, } } + +// scpLikeURLRegex matches SCP-like URLs: user@host:path (where host is not empty and path doesn't start with /) +// This handles: git@github.com:org/repo, deploy@host:repo, user@10.0.0.5:project +var scpLikeURLRegex = regexp.MustCompile(`^([^@]+)@([^:]+):(.+)$`) + +// extractHost extracts the host from a URL, handling both standard URLs and SCP-like URLs. +// Returns empty string if host cannot be determined. +func extractHost(u string) string { + // Try standard URL parsing first + if parsed, err := url.Parse(u); err == nil && parsed.Host != "" { + // Remove port if present + host := parsed.Hostname() + return strings.ToLower(host) + } + + // Handle SCP-like URLs: user@host:path + if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { + return strings.ToLower(matches[2]) + } + + return "" +} + +// SameHost checks if two URLs point to the same host. +// Used to determine if credentials should be forwarded to submodules. +func SameHost(url1, url2 string) bool { + host1 := extractHost(url1) + host2 := extractHost(url2) + return host1 != "" && host2 != "" && host1 == host2 +} + +// RedactURL redacts credentials from a URL for safe logging. +// Handles: +// - Standard URLs with userinfo: https://user:pass@host, https://token@host +// - URL-encoded credentials: https://user:p%40ss@host +// - SCP-like URLs: git@host:path, deploy@host:path, user@10.0.0.5:path +// - Various schemes: http, https, ssh, git, ftp, sftp +// - IPv6 hosts: https://user@[2001:db8::1]/path +func RedactURL(u string) string { + // Try to parse as a standard URL first (handles schemes like https://, ssh://, etc.) + parsed, err := url.Parse(u) + if err == nil && parsed.Scheme != "" && parsed.Host != "" { + // Successfully parsed as a URL with a scheme and host + // Redact userinfo if present (handles user, user:pass, token, URL-encoded creds) + if parsed.User != nil { + // Build URL manually to avoid url.User encoding *** as %2A%2A%2A + result := parsed.Scheme + "://***@" + parsed.Host + parsed.Path + if parsed.RawQuery != "" { + result += "?" + parsed.RawQuery + } + if parsed.Fragment != "" { + result += "#" + parsed.Fragment + } + return result + } + return parsed.String() + } + + // Handle SCP-like URLs: user@host:path (no scheme) + // Only check this if url.Parse didn't find a valid scheme+host + // (to avoid matching URLs like https://user@[ipv6]:path) + // This catches: git@github.com:org/repo, deploy@host:repo, oauth2:token@gitlab.com:org/repo + if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { + // matches[1] = user part (could be git, deploy, oauth2:token, etc.) + // matches[2] = host + // matches[3] = path + return "***@" + matches[2] + ":" + matches[3] + } + + // If we can't parse it and it's not SCP-like, return as-is + // (probably not a URL with credentials) + return u +} + +// ResolveSubmoduleURL resolves a potentially relative submodule URL against a parent repository URL. +// +// Limitation: SCP-like URLs (e.g., git@github.com:org/repo.git) are not supported as parent URLs +// when the submodule uses a relative path. This is a known limitation. +// See: https://github.com/coder/envbuilder/issues/492 +func ResolveSubmoduleURL(parentURL, submoduleURL string) (string, error) { + // If the submodule URL is absolute (contains ://) or doesn't start with ./ or ../, return it as-is + if strings.Contains(submoduleURL, "://") || (!strings.HasPrefix(submoduleURL, "../") && !strings.HasPrefix(submoduleURL, "./")) { + return submoduleURL, nil + } + + // Check if parent URL is SCP-like (e.g., git@github.com:org/repo.git) + // These cannot be properly parsed by net/url and relative submodule resolution is not supported. + if scpLikeURLRegex.MatchString(parentURL) { + return "", fmt.Errorf("relative submodule URL %q cannot be resolved: parent URL %q uses SCP-like syntax which is not supported for relative submodule resolution (see https://github.com/coder/envbuilder/issues/492)", submoduleURL, RedactURL(parentURL)) + } + + // Parse the parent URL + parentParsed, err := url.Parse(parentURL) + if err != nil { + return "", fmt.Errorf("parse parent URL: %w", err) + } + + // For relative URLs, we need to resolve them against the parent's path + // The parent path represents a repository (like a file in filesystem terms) + // So ../something means "sibling repository" + parentPath := strings.TrimSuffix(parentParsed.Path, "/") + + // Split the submodule URL into components + // and manually walk up the directory tree for each ../ + currentPath := parentPath + relativeParts := strings.Split(submoduleURL, "/") + + for _, part := range relativeParts { + if part == ".." { + // Go up one directory + currentPath = path.Dir(currentPath) + } else if part == "." { + // Stay in current directory + continue + } else if part != "" { + // Add this component to the path + currentPath = currentPath + "/" + part + } + } + + // Clean the final path + resolvedPath := path.Clean(currentPath) + + // Construct the absolute URL + resolvedParsed := &url.URL{ + Scheme: parentParsed.Scheme, + User: parentParsed.User, + Host: parentParsed.Host, + Path: resolvedPath, + } + + return resolvedParsed.String(), nil +} + +// initSubmodules recursively initializes and updates all submodules in the repository. +// currentDepth tracks the current recursion level (starts at 1). +func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Repository, opts CloneRepoOptions, currentDepth int) error { + if currentDepth > opts.SubmoduleDepth { + logf("⚠ Skipping nested submodules: max depth %d reached", opts.SubmoduleDepth) + return nil + } + logf("🔗 Initializing git submodules (depth %d/%d)...", currentDepth, opts.SubmoduleDepth) + + w, err := repo.Worktree() + if err != nil { + return fmt.Errorf("get worktree: %w", err) + } + + subs, err := w.Submodules() + if err != nil { + return fmt.Errorf("get submodules: %w", err) + } + + if len(subs) == 0 { + logf("No submodules found") + return nil + } + + logf("Found %d submodule(s)", len(subs)) + + // Get the parent repository URL for resolving relative submodule URLs + cfg, err := repo.Config() + if err != nil { + return fmt.Errorf("get repo config: %w", err) + } + + parentURL := opts.RepoURL + if origin, hasOrigin := cfg.Remotes["origin"]; hasOrigin && len(origin.URLs) > 0 { + parentURL = origin.URLs[0] + } + logf("Parent repository URL: %s", RedactURL(parentURL)) + + for _, sub := range subs { + subConfig := sub.Config() + logf("📦 Initializing submodule: %s", subConfig.Name) + logf(" Submodule path: %s", subConfig.Path) + logf(" Submodule URL (from .gitmodules): %s", RedactURL(subConfig.URL)) + + // Get the expected commit hash + subStatus, err := sub.Status() + if err != nil { + return fmt.Errorf("get submodule status for %q: %w", subConfig.Name, err) + } + logf(" Expected commit: %s", subStatus.Expected) + + // Resolve the submodule URL + resolvedURL, err := ResolveSubmoduleURL(parentURL, subConfig.URL) + if err != nil { + return fmt.Errorf("resolve submodule URL for %q: %w", subConfig.Name, err) + } + logf(" Resolved URL: %s", RedactURL(resolvedURL)) + + // Clone the submodule manually + err = cloneSubmodule(ctx, logf, w, subConfig, subStatus.Expected, resolvedURL, opts) + if err != nil { + return fmt.Errorf("clone submodule %q: %w", subConfig.Name, err) + } + + logf("✓ Submodule initialized: %s", subConfig.Name) + + // Recursively handle nested submodules + subRepo, err := sub.Repository() + if err != nil { + logf(" ⚠ Could not open submodule repository %s: %v", subConfig.Name, err) + continue + } + + // Check for nested submodules + subWorktree, err := subRepo.Worktree() + if err == nil { + nestedSubs, err := subWorktree.Submodules() + if err == nil && len(nestedSubs) > 0 { + logf(" Found %d nested submodule(s) in %s", len(nestedSubs), subConfig.Name) + // Create new opts with the submodule's URL as the parent + nestedOpts := opts + nestedOpts.RepoURL = resolvedURL + err = initSubmodules(ctx, logf, subRepo, nestedOpts, currentDepth+1) + if err != nil { + return fmt.Errorf("init nested submodules in %q: %w", subConfig.Name, err) + } + } + } + } + + logf("✓ All submodules initialized successfully") + return nil +} + +// cloneSubmodule manually clones a submodule repository +func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktree *git.Worktree, subConfig *config.Submodule, expectedHash plumbing.Hash, resolvedURL string, opts CloneRepoOptions) error { + // Get the submodule directory within the parent worktree + submodulePath := subConfig.Path + + // Create the submodule directory + subFS, err := parentWorktree.Filesystem.Chroot(submodulePath) + if err != nil { + return fmt.Errorf("chroot to submodule path: %w", err) + } + + // Security: Only forward parent repo auth if submodule is on the same host. + // This prevents credential exfiltration if a malicious .gitmodules points + // to an attacker-controlled server. + var submoduleAuth transport.AuthMethod + if SameHost(opts.RepoURL, resolvedURL) { + submoduleAuth = opts.RepoAuth + } else if opts.RepoAuth != nil { + logf(" ⚠ Not forwarding auth to submodule (different host: %s)", extractHost(resolvedURL)) + } + + // Check if already cloned + _, err = subFS.Stat(".git") + if err == nil { + logf(" Submodule already cloned, checking out expected commit...") + // Open the existing repository + subRepo, err := git.Open( + filesystem.NewStorage(subFS, cache.NewObjectLRU(cache.DefaultMaxSize)), + subFS, + ) + if err != nil { + return fmt.Errorf("open existing submodule: %w", err) + } + + subWorktree, err := subRepo.Worktree() + if err != nil { + return fmt.Errorf("get submodule worktree: %w", err) + } + + // Checkout the expected commit + err = subWorktree.Checkout(&git.CheckoutOptions{ + Hash: expectedHash, + }) + if err != nil { + return fmt.Errorf("checkout expected commit: %w", err) + } + return nil + } + + // Clone the submodule + logf(" Cloning submodule from: %s", RedactURL(resolvedURL)) + + // Create .git directory for the submodule + err = subFS.MkdirAll(".git", 0o755) + if err != nil { + return fmt.Errorf("create .git directory: %w", err) + } + + subGitDir, err := subFS.Chroot(".git") + if err != nil { + return fmt.Errorf("chroot to .git: %w", err) + } + + gitStorage := filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) + + // Clone the submodule repository + // Use SingleBranch=false to fetch all branches so we can find the commit + subRepo, err := git.CloneContext(ctx, gitStorage, subFS, &git.CloneOptions{ + URL: resolvedURL, + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + SingleBranch: false, // Fetch all branches + NoCheckout: true, // Don't checkout yet, we'll do it manually + }) + if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) { + return fmt.Errorf("clone submodule repository: %w", err) + } + + // Verify the commit exists + logf(" Verifying commit exists: %s", expectedHash) + _, err = subRepo.CommitObject(expectedHash) + if err != nil { + // Commit not found, try fetching with the specific hash + logf(" Commit not found, attempting to fetch it directly...") + err = subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + RefSpecs: []config.RefSpec{ + config.RefSpec("+" + expectedHash.String() + ":" + expectedHash.String()), + }, + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if err != nil && err != git.NoErrAlreadyUpToDate { + // If that fails, try fetching all refs + logf(" Direct fetch failed, fetching all refs...") + err = subRepo.FetchContext(ctx, &git.FetchOptions{ + RemoteName: "origin", + Auth: submoduleAuth, + Progress: opts.Progress, + InsecureSkipTLS: opts.Insecure, + CABundle: opts.CABundle, + ProxyOptions: opts.ProxyOptions, + }) + if err != nil && err != git.NoErrAlreadyUpToDate { + return fmt.Errorf("fetch commit %s: %w", expectedHash, err) + } + } + + // Verify again + _, err = subRepo.CommitObject(expectedHash) + if err != nil { + return fmt.Errorf("commit %s still not found after fetch: %w", expectedHash, err) + } + } + + // Checkout the specific commit expected by the parent repository + logf(" Checking out commit: %s", expectedHash) + subWorktree, err := subRepo.Worktree() + if err != nil { + return fmt.Errorf("get submodule worktree: %w", err) + } + + err = subWorktree.Checkout(&git.CheckoutOptions{ + Hash: expectedHash, + }) + if err != nil { + return fmt.Errorf("checkout expected commit %s: %w", expectedHash, err) + } + + return nil +} diff --git a/git/git_test.go b/git/git_test.go index c6422897..c3a0b69f 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -533,6 +533,331 @@ func mustRead(t *testing.T, fs billy.Filesystem, path string) string { return string(content) } +func TestRedactURL(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + input string + expect string + }{ + // Standard URLs without credentials + { + name: "https no creds", + input: "https://github.com/org/repo.git", + expect: "https://github.com/org/repo.git", + }, + { + name: "git protocol no creds", + input: "git://github.com/org/repo.git", + expect: "git://github.com/org/repo.git", + }, + + // HTTPS with various credential formats + { + name: "https with user and password", + input: "https://user:password@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with token only (no password)", + input: "https://ghp_xxxxxxxxxxxx@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with user only (no password)", + input: "https://user@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with x-access-token", + input: "https://x-access-token:ghp_secret123@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + + // URL-encoded credentials + { + name: "https with URL-encoded password", + input: "https://user:p%40ss%3Aw0rd@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + { + name: "https with URL-encoded username", + input: "https://user%40domain:pass@github.com/org/repo.git", + expect: "https://***@github.com/org/repo.git", + }, + + // HTTP + { + name: "http with creds", + input: "http://user:pass@example.com/repo.git", + expect: "http://***@example.com/repo.git", + }, + + // SSH URLs (with scheme) + { + name: "ssh with user", + input: "ssh://git@github.com/org/repo.git", + expect: "ssh://***@github.com/org/repo.git", + }, + { + name: "ssh with different user", + input: "ssh://deploy@github.com/org/repo.git", + expect: "ssh://***@github.com/org/repo.git", + }, + + // SCP-like URLs (no scheme) + { + name: "scp-like git user", + input: "git@github.com:org/repo.git", + expect: "***@github.com:org/repo.git", + }, + { + name: "scp-like deploy user", + input: "deploy@host:repo.git", + expect: "***@host:repo.git", + }, + { + name: "scp-like with IP address", + input: "user@10.0.0.5:project.git", + expect: "***@10.0.0.5:project.git", + }, + { + name: "scp-like with token as user", + input: "oauth2:ghp_secret@gitlab.com:org/repo.git", + expect: "***@gitlab.com:org/repo.git", + }, + + // IPv6 hosts + { + name: "https with IPv6 and creds", + input: "https://user:pass@[2001:db8::1]/path/repo.git", + expect: "https://***@[2001:db8::1]/path/repo.git", + }, + { + name: "https with IPv6 no creds", + input: "https://[2001:db8::1]/path/repo.git", + expect: "https://[2001:db8::1]/path/repo.git", + }, + + // Other schemes + { + name: "ftp with creds", + input: "ftp://user:pass@host/path", + expect: "ftp://***@host/path", + }, + { + name: "sftp with user only", + input: "sftp://user@host/path", + expect: "sftp://***@host/path", + }, + + // Edge cases + { + name: "plain path (not a URL)", + input: "/local/path/to/repo", + expect: "/local/path/to/repo", + }, + { + name: "relative path", + input: "../sibling/repo.git", + expect: "../sibling/repo.git", + }, + { + name: "file URL", + input: "file:///local/repo.git", + expect: "file:///local/repo.git", + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := git.RedactURL(tc.input) + require.Equal(t, tc.expect, got) + }) + } +} + +func TestSameHost(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + url1 string + url2 string + expect bool + }{ + // Same host cases + { + name: "https same host", + url1: "https://github.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "https and scp same host", + url1: "https://github.com/org/repo.git", + url2: "git@github.com:other/submodule.git", + expect: true, + }, + { + name: "scp same host", + url1: "git@github.com:org/repo.git", + url2: "git@github.com:other/submodule.git", + expect: true, + }, + { + name: "case insensitive", + url1: "https://GitHub.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "with port same host", + url1: "https://github.com:443/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + { + name: "ssh scheme same host", + url1: "ssh://git@github.com/org/repo.git", + url2: "https://github.com/other/submodule.git", + expect: true, + }, + + // Different host cases + { + name: "different hosts", + url1: "https://github.com/org/repo.git", + url2: "https://gitlab.com/other/submodule.git", + expect: false, + }, + { + name: "scp different hosts", + url1: "git@github.com:org/repo.git", + url2: "git@evil.com:exfiltrate/creds.git", + expect: false, + }, + { + name: "subdomain is different", + url1: "https://github.com/org/repo.git", + url2: "https://api.github.com/other/submodule.git", + expect: false, + }, + + // Edge cases + { + name: "empty url1", + url1: "", + url2: "https://github.com/other/submodule.git", + expect: false, + }, + { + name: "relative url", + url1: "https://github.com/org/repo.git", + url2: "../other/submodule.git", + expect: false, + }, + { + name: "file path", + url1: "https://github.com/org/repo.git", + url2: "/local/path/to/repo", + expect: false, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + got := git.SameHost(tc.url1, tc.url2) + require.Equal(t, tc.expect, got) + }) + } +} + +func TestResolveSubmoduleURL(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + parentURL string + subURL string + expect string + expectErr string + }{ + { + name: "absolute", + parentURL: "https://example.com/org/main.git", + subURL: "https://github.com/other/repo.git", + expect: "https://github.com/other/repo.git", + }, + { + name: "relativeSibling", + parentURL: "https://example.com/org/main.git", + subURL: "../deps/lib.git", + expect: "https://example.com/org/deps/lib.git", + }, + { + name: "relativeChild", + parentURL: "https://example.com/org/main.git", + subURL: "./extras/tool.git", + expect: "https://example.com/org/main.git/extras/tool.git", + }, + { + name: "badParent", + parentURL: "://bad", + subURL: "./child", + expectErr: "parse parent URL", + }, + { + name: "scpParentWithRelativeSubmodule", + parentURL: "git@github.com:org/main.git", + subURL: "../other/submodule.git", + expectErr: "SCP-like syntax which is not supported", + }, + { + name: "scpParentWithAbsoluteSubmodule", + parentURL: "git@github.com:org/main.git", + subURL: "https://github.com/other/submodule.git", + expect: "https://github.com/other/submodule.git", + }, + } + + for _, tc := range cases { + c := tc + t.Run(c.name, func(t *testing.T) { + t.Parallel() + got, err := git.ResolveSubmoduleURL(c.parentURL, c.subURL) + if c.expectErr != "" { + require.ErrorContains(t, err, c.expectErr) + return + } + require.NoError(t, err) + require.Equal(t, c.expect, got) + }) + } +} + +func TestCloneOptionsFromOptions_Submodules(t *testing.T) { + t.Parallel() + + fs := memfs.New() + opts := options.Options{ + Filesystem: fs, + WorkspaceFolder: "/workspace", + GitURL: "https://example.com/example/repo.git", + GitCloneSubmodules: 10, + GitCloneThinPack: true, + } + + cloneOpts, err := git.CloneOptionsFromOptions(t.Logf, opts) + require.NoError(t, err) + require.Equal(t, 10, cloneOpts.SubmoduleDepth) +} + // generates a random ed25519 private key func randKeygen(t *testing.T) gossh.Signer { t.Helper() diff --git a/integration/integration_test.go b/integration/integration_test.go index 102bfba8..24af51d9 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -418,6 +418,36 @@ func TestSucceedsGitAuth(t *testing.T) { require.Contains(t, gitConfig, srv.URL) } +func TestGitSubmodules(t *testing.T) { + t.Parallel() + + // Create parent repo with a submodule + parentSrv, _ := gittest.CreateGitServerWithSubmodule(t, gittest.Options{ + Files: map[string]string{ + "Dockerfile": "FROM " + testImageAlpine, + }, + }, gittest.Options{ + Files: map[string]string{ + "subfile.txt": "submodule content", + }, + }) + + ctr, err := runEnvbuilder(t, runOpts{env: []string{ + envbuilderEnv("GIT_URL", parentSrv.URL), + envbuilderEnv("DOCKERFILE_PATH", "Dockerfile"), + envbuilderEnv("GIT_CLONE_SUBMODULES", "true"), + }}) + require.NoError(t, err) + + // Verify the .gitmodules file exists + gitmodules := execContainer(t, ctr, "cat /workspaces/empty/.gitmodules") + require.Contains(t, gitmodules, "[submodule") + + // Verify the submodule was actually cloned by checking for the file inside it + subfileContent := execContainer(t, ctr, "cat /workspaces/empty/submod/subfile.txt") + require.Contains(t, subfileContent, "submodule content") +} + func TestGitSSHAuth(t *testing.T) { t.Parallel() diff --git a/options/options.go b/options/options.go index 8cdf723a..7d8b554f 100644 --- a/options/options.go +++ b/options/options.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "fmt" "os" + "strconv" "strings" "github.com/coder/envbuilder/log" @@ -12,6 +13,45 @@ import ( "github.com/go-git/go-billy/v5" ) +// SubmoduleDepth is a custom type for handling submodule depth that accepts +// "true" (defaults to 10), "false" (0), or a positive integer. +type SubmoduleDepth int + +const DefaultSubmoduleDepth = 10 + +func (s *SubmoduleDepth) Set(val string) error { + lower := strings.ToLower(strings.TrimSpace(val)) + switch lower { + case "true", "yes": + *s = DefaultSubmoduleDepth + return nil + case "false", "no", "": + *s = 0 + return nil + } + n, err := strconv.Atoi(val) + if err != nil { + return fmt.Errorf("invalid submodule depth %q: must be true, false, or a positive integer", val) + } + if n < 0 { + return fmt.Errorf("submodule depth must be non-negative, got %d", n) + } + *s = SubmoduleDepth(n) + return nil +} + +func (s *SubmoduleDepth) String() string { + return strconv.Itoa(int(*s)) +} + +func (s *SubmoduleDepth) Type() string { + return "submodule-depth" +} + +func SubmoduleDepthOf(s *int) *SubmoduleDepth { + return (*SubmoduleDepth)(s) +} + // Options contains the configuration for the envbuilder. type Options struct { // SetupScript is the script to run before the init script. It runs as the @@ -108,6 +148,10 @@ type Options struct { GitCloneSingleBranch bool // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool + // GitCloneSubmodules controls submodule initialization after cloning. + // 0 = disabled (default), positive integer = max recursion depth. + // Accepts "true" (defaults to 10), "false" (0), or a positive integer. + GitCloneSubmodules int // GitUsername is the username to use for Git authentication. This is // optional. GitUsername string @@ -384,7 +428,14 @@ func (o *Options) CLI() serpent.OptionSet { Default: "true", Description: "Git clone with thin pack compatibility enabled, " + "ensuring that even when thin pack compatibility is activated," + - "it will not be turned on for the domain dev.zaure.com.", + "it will not be turned on for the domain dev.azure.com.", + }, + { + Flag: "git-clone-submodules", + Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), + Value: SubmoduleDepthOf(&o.GitCloneSubmodules), + Description: "Clone Git submodules after cloning the repository. " + + "Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth.", }, { Flag: "git-username", diff --git a/options/options_test.go b/options/options_test.go index ed5dcd3c..cdeec083 100644 --- a/options/options_test.go +++ b/options/options_test.go @@ -72,6 +72,37 @@ func TestEnvOptionParsing(t *testing.T) { require.False(t, o.GitCloneSingleBranch) require.True(t, o.GitCloneThinPack) }) + + t.Run("remote repo build mode", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("REMOTE_REPO_BUILD_MODE"), "true") + o := runCLI() + require.True(t, o.RemoteRepoBuildMode) + }) + + t.Run("binary path", func(t *testing.T) { + const val = "/usr/local/bin/envbuilder" + t.Setenv(options.WithEnvPrefix("BINARY_PATH"), val) + o := runCLI() + require.Equal(t, o.BinaryPath, val) + }) + + t.Run("git clone submodules true", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "true") + o := runCLI() + require.Equal(t, 10, o.GitCloneSubmodules) // "true" defaults to depth 10 + }) + + t.Run("git clone submodules depth", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "3") + o := runCLI() + require.Equal(t, 3, o.GitCloneSubmodules) + }) + + t.Run("git clone submodules false", func(t *testing.T) { + t.Setenv(options.WithEnvPrefix("GIT_CLONE_SUBMODULES"), "false") + o := runCLI() + require.Equal(t, 0, o.GitCloneSubmodules) + }) }) } diff --git a/options/testdata/options.golden b/options/testdata/options.golden index 92a85232..6c086d56 100644 --- a/options/testdata/options.golden +++ b/options/testdata/options.golden @@ -99,10 +99,15 @@ OPTIONS: --git-clone-single-branch bool, $ENVBUILDER_GIT_CLONE_SINGLE_BRANCH Clone only a single branch of the Git repository. + --git-clone-submodules submodule-depth, $ENVBUILDER_GIT_CLONE_SUBMODULES + Clone Git submodules after cloning the repository. Accepts 'true' (max + depth 10), 'false' (disabled), or a positive integer for max recursion + depth. + --git-clone-thinpack bool, $ENVBUILDER_GIT_CLONE_THINPACK (default: true) Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for - the domain dev.zaure.com. + the domain dev.azure.com. --git-http-proxy-url string, $ENVBUILDER_GIT_HTTP_PROXY_URL The URL for the HTTP proxy. This is optional. diff --git a/testutil/gittest/gittest.go b/testutil/gittest/gittest.go index 1a0e2424..e9bac660 100644 --- a/testutil/gittest/gittest.go +++ b/testutil/gittest/gittest.go @@ -20,8 +20,11 @@ import ( "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/index" "github.com/go-git/go-git/v5/plumbing/format/pktline" "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/plumbing/protocol/packp" @@ -270,6 +273,109 @@ func NewRepo(t *testing.T, fs billy.Filesystem, commits ...CommitFunc) *git.Repo return repo } +// CreateGitServerWithSubmodule creates a parent git repo with a submodule pointing to another repo. +// Returns the parent server and the submodule server. +// The submodule is properly registered with a gitlink entry in the tree. +func CreateGitServerWithSubmodule(t *testing.T, opts Options, submoduleOpts Options) (parentSrv *httptest.Server, submoduleSrv *httptest.Server) { + t.Helper() + + // Create the submodule repo first and get its HEAD commit + submoduleFS := memfs.New() + submoduleCommits := make([]CommitFunc, 0) + for path, content := range submoduleOpts.Files { + submoduleCommits = append(submoduleCommits, Commit(t, path, content, "submodule commit")) + } + submoduleRepo := NewRepo(t, submoduleFS, submoduleCommits...) + + // Get the submodule's HEAD commit hash + submoduleHead, err := submoduleRepo.Head() + require.NoError(t, err) + submoduleHash := submoduleHead.Hash() + + // Start the submodule server + if submoduleOpts.AuthMW == nil { + submoduleOpts.AuthMW = mwtest.BasicAuthMW(submoduleOpts.Username, submoduleOpts.Password) + } + if submoduleOpts.TLS { + submoduleSrv = httptest.NewTLSServer(submoduleOpts.AuthMW(NewServer(submoduleFS))) + } else { + submoduleSrv = httptest.NewServer(submoduleOpts.AuthMW(NewServer(submoduleFS))) + } + + // Create the parent repo with .gitmodules and gitlink entry + if opts.AuthMW == nil { + opts.AuthMW = mwtest.BasicAuthMW(opts.Username, opts.Password) + } + + parentFS := memfs.New() + commits := make([]CommitFunc, 0) + for path, content := range opts.Files { + commits = append(commits, Commit(t, path, content, "my test commit")) + } + + // Add .gitmodules file and gitlink entry for the submodule + commits = append(commits, CommitSubmodule(t, "submod", submoduleSrv.URL, submoduleHash)) + + _ = NewRepo(t, parentFS, commits...) + + if opts.TLS { + parentSrv = httptest.NewTLSServer(opts.AuthMW(NewServer(parentFS))) + } else { + parentSrv = httptest.NewServer(opts.AuthMW(NewServer(parentFS))) + } + return parentSrv, submoduleSrv +} + +// CommitSubmodule creates a commit that adds a submodule with proper .gitmodules and gitlink entry. +func CommitSubmodule(t *testing.T, path, url string, hash plumbing.Hash) CommitFunc { + return func(fs billy.Filesystem, repo *git.Repository) { + t.Helper() + tree, err := repo.Worktree() + require.NoError(t, err) + + // Create .gitmodules file + gitmodulesContent := fmt.Sprintf("[submodule %q]\n\tpath = %s\n\turl = %s\n", path, path, url) + WriteFile(t, fs, ".gitmodules", gitmodulesContent) + _, err = tree.Add(".gitmodules") + require.NoError(t, err) + + // Add submodule config to .git/config + cfg, err := repo.Config() + require.NoError(t, err) + cfg.Submodules[path] = &config.Submodule{ + Name: path, + Path: path, + URL: url, + } + err = repo.SetConfig(cfg) + require.NoError(t, err) + + // Create the gitlink entry (mode 160000 commit reference) + // We need to add it directly to the index + idx, err := repo.Storer.Index() + require.NoError(t, err) + + // Add a gitlink entry - this is a special index entry with mode 160000 + idx.Entries = append(idx.Entries, &index.Entry{ + Mode: filemode.Submodule, + Hash: hash, + Name: path, + }) + err = repo.Storer.SetIndex(idx) + require.NoError(t, err) + + // Commit the changes + _, err = tree.Commit("add submodule", &git.CommitOptions{ + Author: &object.Signature{ + Name: "Example", + Email: "test@example.com", + When: time.Now(), + }, + }) + require.NoError(t, err) + } +} + // WriteFile writes a file to the filesystem. func WriteFile(t *testing.T, fs billy.Filesystem, path, content string) { t.Helper()