Skip to content

Instantly share code, notes, and snippets.

@corneliusroemer
Created April 21, 2025 14:25
Show Gist options
  • Select an option

  • Save corneliusroemer/c58cf0faf957d9001b58d4ed14cb0a21 to your computer and use it in GitHub Desktop.

Select an option

Save corneliusroemer/c58cf0faf957d9001b58d4ed14cb0a21 to your computer and use it in GitHub Desktop.

Revisions

  1. corneliusroemer created this gist Apr 21, 2025.
    439 changes: 439 additions & 0 deletions cpd_spegel_peerd.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,439 @@
    # Analysis of copy/paste patterns in the initial commit of peerd (copying from spegel)

    Created using:
    - https://github.com/spegel-org/spegel/blob/ed21d4da925b9a179c8f4094e22402a5bb2a930c
    - https://github.com/Azure/peerd/blob/64b8928943ddd73691d0b5d80609a2e134a96c77

    CPD was run with:

    ```
    pmd cpd --minimum-tokens 100 --dir ./ ../spegel --language go > cpd.txt
    ```

    False-positives (peerd-internal duplications) were manually removed.

    See https://news.ycombinator.com/item?id=43750535

    ```
    ==========================================
    Found a 178 line (771 tokens) duplication in the following files:
    Starting at line 116 of /Users/cr/code/peerd/../spegel/pkg/oci/containerd_test.go
    Starting at line 24 of /Users/cr/code/peerd/internal/containerd/mirror_test.go
    }{
    {
    name: "multiple mirros",
    resolveTags: true,
    registries: stringListToUrlList(t, []string{"http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000", "http://127.0.0.1:5001"}),
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    [host.'http://127.0.0.1:5001']
    capabilities = ['pull', 'resolve']
    `,
    },
    },
    {
    name: "resolve tags disabled",
    resolveTags: false,
    registries: stringListToUrlList(t, []string{"https://docker.io", "http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000"}),
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/docker.io/hosts.toml": `server = 'https://registry-1.docker.io'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull']
    `,
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull']
    `,
    },
    },
    {
    name: "config path directory does not exist",
    resolveTags: true,
    registries: stringListToUrlList(t, []string{"https://docker.io", "http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000"}),
    createConfigPathDir: false,
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/docker.io/hosts.toml": `server = 'https://registry-1.docker.io'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    },
    },
    {
    name: "config path directory does exist",
    resolveTags: true,
    registries: stringListToUrlList(t, []string{"https://docker.io", "http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000"}),
    createConfigPathDir: true,
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/docker.io/hosts.toml": `server = 'https://registry-1.docker.io'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    },
    },
    {
    name: "config path directory contains configuration",
    resolveTags: true,
    registries: stringListToUrlList(t, []string{"https://docker.io", "http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000"}),
    createConfigPathDir: true,
    existingFiles: map[string]string{
    "/etc/containerd/certs.d/docker.io/hosts.toml": "Hello World",
    "/etc/containerd/certs.d/ghcr.io/hosts.toml": "Foo Bar",
    },
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/_backup/docker.io/hosts.toml": "Hello World",
    "/etc/containerd/certs.d/_backup/ghcr.io/hosts.toml": "Foo Bar",
    "/etc/containerd/certs.d/docker.io/hosts.toml": `server = 'https://registry-1.docker.io'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    },
    },
    {
    name: "config path directory contains backup",
    resolveTags: true,
    registries: stringListToUrlList(t, []string{"https://docker.io", "http://foo.bar:5000"}),
    mirrors: stringListToUrlList(t, []string{"http://127.0.0.1:5000"}),
    createConfigPathDir: true,
    existingFiles: map[string]string{
    "/etc/containerd/certs.d/_backup/docker.io/hosts.toml": "Hello World",
    "/etc/containerd/certs.d/_backup/ghcr.io/hosts.toml": "Foo Bar",
    "/etc/containerd/certs.d/test.txt": "test",
    "/etc/containerd/certs.d/foo": "bar",
    },
    expectedFiles: map[string]string{
    "/etc/containerd/certs.d/_backup/docker.io/hosts.toml": "Hello World",
    "/etc/containerd/certs.d/_backup/ghcr.io/hosts.toml": "Foo Bar",
    "/etc/containerd/certs.d/docker.io/hosts.toml": `server = 'https://registry-1.docker.io'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    "/etc/containerd/certs.d/foo.bar:5000/hosts.toml": `server = 'http://foo.bar:5000'
    [host]
    [host.'http://127.0.0.1:5000']
    capabilities = ['pull', 'resolve']
    `,
    },
    },
    }
    for _, tt := range tests {
    t.Run(tt.name, func(t *testing.T) {
    fs := afero.NewMemMapFs()
    if tt.createConfigPathDir {
    err := fs.Mkdir(registryConfigPath, 0755)
    require.NoError(t, err)
    }
    for k, v := range tt.existingFiles {
    err := afero.WriteFile(fs, k, []byte(v), 0644)
    require.NoError(t, err)
    }
    err := AddMirrorConfiguration(context.TODO(), fs, registryConfigPath, tt.registries, tt.mirrors, tt.resolveTags)
    require.NoError(t, err)
    if len(tt.existingFiles) == 0 {
    ok, err := afero.DirExists(fs, "/etc/containerd/certs.d/_backup")
    require.NoError(t, err)
    require.False(t, ok)
    }
    err = afero.Walk(fs, registryConfigPath, func(path string, fi iofs.FileInfo, _ error) error {
    if fi.IsDir() {
    return nil
    }
    expectedContent, ok := tt.expectedFiles[path]
    require.True(t, ok, path)
    b, err := afero.ReadFile(fs, path)
    require.NoError(t, err)
    require.Equal(t, expectedContent, string(b))
    return nil
    })
    require.NoError(t, err)
    })
    }
    }
    func TestMirrorConfigurationInvalidMirrorURL(t *testing.T) {
    fs := afero.NewMemMapFs()
    mirrors := stringListToUrlList(t, []string{"http://127.0.0.1:5000"})
    registries := stringListToUrlList(t, []string{"ftp://docker.io"})
    err := AddMirrorConfiguration(context.TODO(), fs, "/etc/containerd/certs.d", registries, mirrors, true)
    require.EqualError(t, err, "invalid registry url scheme must be http or https: ftp://docker.io")
    =====================================================================
    Found a 44 line (252 tokens) duplication in the following files:
    Starting at line 75 of /Users/cr/code/peerd/../spegel/pkg/registry/registry_test.go
    Starting at line 77 of /Users/cr/code/peerd/internal/oci/mirror_test.go
    }{
    {
    name: "request should timeout when no peers exists",
    key: "no-peers",
    expectedStatus: http.StatusNotFound,
    expectedBody: "",
    expectedHeaders: nil,
    },
    {
    name: "request should not timeout and give 500 if all peers fail",
    key: "no-working-peers",
    expectedStatus: http.StatusInternalServerError,
    expectedBody: "",
    expectedHeaders: nil,
    },
    {
    name: "request should work when first peer responds",
    key: "first-peer",
    expectedStatus: http.StatusOK,
    expectedBody: "hello world",
    expectedHeaders: map[string][]string{"foo": {"bar"}},
    },
    {
    name: "second peer should respond when first gives error",
    key: "first-peer-error",
    expectedStatus: http.StatusOK,
    expectedBody: "hello world",
    expectedHeaders: map[string][]string{"foo": {"bar"}},
    },
    {
    name: "last peer should respond when two first fail",
    key: "last-peer-working",
    expectedStatus: http.StatusOK,
    expectedBody: "hello world",
    expectedHeaders: map[string][]string{"foo": {"bar"}},
    },
    }
    for _, tt := range tests {
    for _, method := range []string{http.MethodGet, http.MethodHead} {
    t.Run(tt.name, func(t *testing.T) {
    rw := CreateTestResponseRecorder()
    c, _ := gin.CreateTestContext(rw)
    target := fmt.Sprintf("http://example.com/%s", tt.key)
    c.Request = httptest.NewRequest(method, target, nil)
    =====================================================================
    Found a 54 line (250 tokens) duplication in the following files:
    Starting at line 493 of /Users/cr/code/peerd/../spegel/pkg/oci/containerd.go
    Starting at line 72 of /Users/cr/code/peerd/internal/containerd/mirror.go
    log.Info("backing up Containerd host configuration", "path", oldPath)
    }
    }
    }
    // Remove all content from config path to start from clean slate
    files, err := afero.ReadDir(fs, configPath)
    if err != nil {
    return err
    }
    for _, fi := range files {
    if fi.Name() == backupDir {
    continue
    }
    filePath := path.Join(configPath, fi.Name())
    err := fs.RemoveAll(filePath)
    if err != nil {
    return err
    }
    }
    // Write mirror configuration
    capabilities := []string{"pull"}
    if resolveTags {
    capabilities = append(capabilities, "resolve")
    }
    for _, registryURL := range registryURLs {
    // Need a special case for Docker Hub as docker.io is just an alias.
    server := registryURL.String()
    if registryURL.String() == "https://docker.io" {
    server = "https://registry-1.docker.io"
    }
    hostConfigs := map[string]hostConfig{}
    for _, u := range mirrorURLs {
    hostConfigs[u.String()] = hostConfig{Capabilities: capabilities}
    }
    cfg := hostFile{
    Server: server,
    HostConfigs: hostConfigs,
    }
    b, err := toml.Marshal(&cfg)
    if err != nil {
    return err
    }
    fp := path.Join(configPath, registryURL.Host, "hosts.toml")
    err = fs.MkdirAll(path.Dir(fp), 0755)
    if err != nil {
    return err
    }
    err = afero.WriteFile(fs, fp, b, 0644)
    if err != nil {
    return err
    }
    log.Info("added containerd mirror configuration", "registry", registryURL.String(), "path", fp)
    =====================================================================
    Found a 38 line (192 tokens) duplication in the following files:
    Starting at line 456 of /Users/cr/code/peerd/../spegel/pkg/oci/containerd.go
    Starting at line 34 of /Users/cr/code/peerd/internal/containerd/mirror.go
    log := logr.FromContextOrDiscard(ctx)
    if err := validate(registryURLs); err != nil {
    return err
    }
    // Create config path dir if it does not exist
    ok, err := afero.DirExists(fs, configPath)
    if err != nil {
    return err
    }
    if !ok {
    err := fs.MkdirAll(configPath, 0755)
    if err != nil {
    return err
    }
    }
    // Backup files and directories in config path
    backupDirPath := path.Join(configPath, backupDir)
    if _, err := fs.Stat(backupDirPath); os.IsNotExist(err) {
    files, err := afero.ReadDir(fs, configPath)
    if err != nil {
    return err
    }
    if len(files) > 0 {
    err = fs.MkdirAll(backupDirPath, 0755)
    if err != nil {
    return err
    }
    for _, fi := range files {
    oldPath := path.Join(configPath, fi.Name())
    newPath := path.Join(backupDirPath, fi.Name())
    err := fs.Rename(oldPath, newPath)
    if err != nil {
    return err
    }
    log.Info("backing up Containerd host configuration", "path", oldPath)
    =====================================================================
    Found a 39 line (190 tokens) duplication in the following files:
    Starting at line 19 of /Users/cr/code/peerd/../spegel/pkg/oci/image_test.go
    Starting at line 19 of /Users/cr/code/peerd/pkg/containerd/reference_test.go
    }{
    {
    name: "Latest tag",
    image: "library/ubuntu:latest",
    digestInImage: false,
    expectedRepository: "library/ubuntu",
    expectedTag: "latest",
    expectedDigest: digest.Digest("sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda"),
    },
    {
    name: "Only tag",
    image: "library/alpine:3.18.0",
    digestInImage: false,
    expectedRepository: "library/alpine",
    expectedTag: "3.18.0",
    expectedDigest: digest.Digest("sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda"),
    },
    {
    name: "Tag and digest",
    image: "jetstack/cert-manager-controller:3.18.0@sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda",
    digestInImage: true,
    expectedRepository: "jetstack/cert-manager-controller",
    expectedTag: "3.18.0",
    expectedDigest: digest.Digest("sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda"),
    },
    {
    name: "Only digest",
    image: "fluxcd/helm-controller@sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda",
    digestInImage: true,
    expectedRepository: "fluxcd/helm-controller",
    expectedTag: "",
    expectedDigest: digest.Digest("sha256:c0669ef34cdc14332c0f1ab0c2c01acb91d96014b172f1a76f3a39e63d1f0bda"),
    },
    }
    registries := []string{"docker.io", "quay.io", "ghcr.com", "127.0.0.1"}
    for _, registry := range registries {
    for _, tt := range tests {
    t.Run(fmt.Sprintf("%s_%s", tt.name, registry), func(t *testing.T) {
    for _, extraDgst := range []string{tt.expectedDigest.String(), ""} {
    =====================================================================
    Found a 33 line (158 tokens) duplication in the following files:
    Starting at line 15 of /Users/cr/code/peerd/../spegel/pkg/registry/registry_test.go
    Starting at line 14 of /Users/cr/code/peerd/internal/oci/mirror_test.go
    )
    type TestResponseRecorder struct {
    *httptest.ResponseRecorder
    closeChannel chan bool
    }
    func (r *TestResponseRecorder) CloseNotify() <-chan bool {
    return r.closeChannel
    }
    //nolint:unused // ignore
    func (r *TestResponseRecorder) closeClient() {
    r.closeChannel <- true
    }
    func CreateTestResponseRecorder() *TestResponseRecorder {
    return &TestResponseRecorder{
    httptest.NewRecorder(),
    make(chan bool, 1),
    }
    }
    func TestMirrorHandler(t *testing.T) {
    badSvr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    w.WriteHeader(http.StatusInternalServerError)
    w.Header().Set("foo", "bar")
    if r.Method == http.MethodGet {
    //nolint:errcheck // ignore
    w.Write([]byte("hello world"))
    }
    }))
    defer badSvr.Close()
    ```