Skip to content

Commit 23ce0f6

Browse files
committed
Tool for extracting UtilityVM files from a container layer into a CIM
This commit adds a new tool that can take a valid Windows container image tarball and extracts all the UtilityVM files from that tarball into a block CIM. The end result should be a block CIM that has all the files necessary to successfully boot & run a UtilityVM. Signed-off-by: Amit Barve <[email protected]>
1 parent 7084bd2 commit 23ce0f6

File tree

6 files changed

+909
-0
lines changed

6 files changed

+909
-0
lines changed

cmd/mkuvmcim/main.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//go:build windows
2+
// +build windows
3+
4+
package main
5+
6+
import (
7+
"context"
8+
"flag"
9+
"fmt"
10+
"os"
11+
"time"
12+
13+
"github.com/Microsoft/hcsshim/pkg/extractuvm"
14+
)
15+
16+
func main() {
17+
var (
18+
layerTar string
19+
destPath string
20+
)
21+
22+
flag.StringVar(&layerTar, "layer", "", "Path to the source layer tar")
23+
flag.StringVar(&destPath, "dest", "", "Path to the destination directory")
24+
flag.Parse()
25+
26+
if layerTar == "" || destPath == "" {
27+
fmt.Println("Error: Both -layer and -dest flags are required")
28+
flag.Usage()
29+
os.Exit(1)
30+
}
31+
32+
// 5 minutes should be more than enough to extract all the files
33+
ctx, cancelFunc := context.WithTimeout(context.Background(), 5*time.Minute)
34+
defer cancelFunc()
35+
36+
if _, err := extractuvm.MakeUtilityVMCIMFromTar(ctx, layerTar, destPath); err != nil {
37+
fmt.Printf("failed to create UVM CIM: %s", err)
38+
os.Exit(1)
39+
}
40+
}

pkg/extractuvm/bcim.go

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
//go:build windows
2+
// +build windows
3+
4+
package extractuvm
5+
6+
import (
7+
"archive/tar"
8+
"compress/gzip"
9+
"context"
10+
"errors"
11+
"fmt"
12+
"io"
13+
"log/slog"
14+
"os"
15+
"path/filepath"
16+
17+
"github.com/Microsoft/hcsshim/pkg/cimfs"
18+
)
19+
20+
const (
21+
LevelTrace = slog.Level(-8)
22+
)
23+
24+
func MakeUtilityVMCIMFromTar(ctx context.Context, tarPath, destPath string) (_ *cimfs.BlockCIM, err error) {
25+
slog.InfoContext(ctx, "Extracting UtilityVM files from tar", "tarPath", tarPath, "destPath", destPath)
26+
27+
tarFile, err := os.Open(tarPath)
28+
if err != nil {
29+
return nil, fmt.Errorf("failed to open layer tar: %w", err)
30+
}
31+
defer tarFile.Close()
32+
33+
err = os.MkdirAll(destPath, 0755)
34+
if err != nil {
35+
return nil, fmt.Errorf("failed to create destination directory: %w", err)
36+
}
37+
38+
uvmCIM := &cimfs.BlockCIM{
39+
Type: cimfs.BlockCIMTypeSingleFile,
40+
BlockPath: filepath.Join(destPath, "boot.bcim"),
41+
CimName: "boot.cim",
42+
}
43+
44+
w, err := newUVMCIMWriter(uvmCIM)
45+
if err != nil {
46+
return nil, fmt.Errorf("failed to create block CIM writer: %w", err)
47+
}
48+
defer func() {
49+
cErr := w.Close(ctx)
50+
if err == nil {
51+
err = cErr
52+
}
53+
}()
54+
55+
if err = extractUtilityVMFilesFromTar(ctx, tarFile, w); err != nil {
56+
return nil, fmt.Errorf("failed to extract UVM layer: %w", err)
57+
}
58+
return uvmCIM, nil
59+
}
60+
61+
// extractUtilityVMFilesFromTar writes all the files in the tar under the
62+
// `UitilityVM/Files` directory into the CIM. For windows container image layer tarballs,
63+
// there is complex web of hardlinks between the files under `Files` &
64+
// `UtilityVM/Files`. To correctly handle this when extracting UtilityVM files this code
65+
// makes following assumptions based on the way windows layer tarballs are currently
66+
// structured:
67+
// 1. If tar iteration comes across a file of type `TypeLink`, the target that this link points to MUST have already been iterated over.
68+
// 2. When iterating over the tarball, `Files` directory tree always comes before the `UtilityVM/Files` directory.
69+
// 3. There are hardlinks under `UtilityVM/Files` that point to files under `Files` but not vice versa.
70+
// 4. Since this routine is supposed to be used on a base layer tarball, it doesn't expect any whiteout files in the tarball.
71+
// 5. Files of type `TypeSymlink` are not generally used windows base layers and so the code errors out if it sees such files. Same is the case for files with alternate data streams.
72+
// 6. There are no directories under `UtilityVM/Files` that are hardlinks to directories under `Files`. Only files can be hardlinks.
73+
func extractUtilityVMFilesFromTar(ctx context.Context, tarFile *os.File, w *uvmCIMWriter) error {
74+
gr, err := gzip.NewReader(tarFile)
75+
if err != nil {
76+
return fmt.Errorf("failed to get gzip reader: %w", err)
77+
}
78+
defer gr.Close()
79+
80+
tr := tar.NewReader(gr)
81+
82+
// General approach:
83+
// Iterate over each file in the tar one by one. If we see a file that is under
84+
// `UtilityVM/Files`: If it is a standard file or a directory add it to the CIM
85+
// directly.
86+
// If it is a link, check the target of the link:
87+
// - If the target is not under `UtilityVM/Files`, save it so that we can copy the
88+
// target file under at the path of this link.
89+
// - If the target is also under UtilityVM/Files save it for later addition (we
90+
// can't add the link yet because the target itself could be another link, so we
91+
// need to wait until all link targets are resolved and added to the CIM).
92+
93+
linksToAdd := []*pendingLink{}
94+
// a map of all the files seen in the tar - this is used to resolve nested links
95+
tarContents := make(map[string]*tar.Header)
96+
// a map of all files that we need to copy inside the UtilityVM directory from the
97+
// outside because there are hardlinks to it inside the UtilityVM directory.
98+
// There could be multiple links under UtilityVM/Files that end up directly or
99+
// indirectly pointing to the same target. So we may have to copy the same file at
100+
// multiple locations.
101+
// TODO (ambarve): avoid these multiple copies by only copying once and then
102+
// adding hardlinks.
103+
copies := make(map[string][]string)
104+
105+
for {
106+
hdr, err := tr.Next()
107+
if err != nil {
108+
if errors.Is(err, io.EOF) {
109+
break
110+
}
111+
return fmt.Errorf("tar read failed: %w", err)
112+
}
113+
114+
if err = validateFileType(hdr); err != nil {
115+
return err
116+
}
117+
118+
tarContents[hdr.Name] = hdr
119+
120+
if !hasUtilityVMFilesPrefix(hdr.Name) {
121+
continue
122+
}
123+
124+
// At this point we either have a standard file or a link file that is
125+
// under the UtilityVM\Files directory.
126+
if hdr.Typeflag == tar.TypeLink {
127+
if !hasUtilityVMFilesPrefix(hdr.Linkname) {
128+
// link points to a file outside the UtilityVM\Files
129+
// directory we need to copy this file, but first resolve
130+
// the link
131+
resolvedTarget, err := resolveLink(tarContents, hdr.Linkname)
132+
if err != nil {
133+
return fmt.Errorf("failed to resolve link's [%s] target [%s]: %w", hdr.Name, hdr.Linkname, err)
134+
}
135+
copies[resolvedTarget] = append(copies[resolvedTarget], hdr.Name)
136+
slog.Log(ctx, LevelTrace, "adding to list of pending copies", "src", resolvedTarget, "dest", hdr.Name)
137+
} else {
138+
linksToAdd = append(linksToAdd, &pendingLink{
139+
name: hdr.Name,
140+
target: hdr.Linkname,
141+
})
142+
slog.Log(ctx, LevelTrace, "adding to list of pending links", "link", hdr.Name, "target", hdr.Linkname)
143+
}
144+
continue
145+
}
146+
if err = w.Add(hdr, tr, false); err != nil {
147+
return fmt.Errorf("failed add UtilityVM standard file [%s]: %w", hdr.Name, err)
148+
}
149+
slog.Log(ctx, LevelTrace, "added standard file", "path", hdr.Name)
150+
}
151+
// close the current gzip reader before making a new one
152+
if err = gr.Close(); err != nil {
153+
return fmt.Errorf("failed to close gzip reader after first iteration: %w", err)
154+
}
155+
156+
// reiterate tar and add copies
157+
if _, err = tarFile.Seek(0, 0); err != nil {
158+
return fmt.Errorf("failed to reset file offset: %w", err)
159+
}
160+
161+
gr, err = gzip.NewReader(tarFile)
162+
if err != nil {
163+
return fmt.Errorf("failed to get gzip reader: %w", err)
164+
}
165+
defer gr.Close()
166+
tr = tar.NewReader(gr)
167+
168+
for {
169+
hdr, err := tr.Next()
170+
if err != nil {
171+
if errors.Is(err, io.EOF) {
172+
break
173+
}
174+
return fmt.Errorf("tar read failed: %w", err)
175+
}
176+
177+
dsts, ok := copies[hdr.Name]
178+
if !ok {
179+
continue
180+
}
181+
182+
if !hasFilesPrefix(hdr.Name) {
183+
return fmt.Errorf("copy src file doesn't have expected prefix: [%s]", hdr.Name)
184+
}
185+
186+
for i, dst := range dsts {
187+
if i == 0 {
188+
dHdr := *hdr
189+
dHdr.Name = dst
190+
// copy the first one, next will be links to the first
191+
if err = w.Add(&dHdr, tr, true); err != nil {
192+
return fmt.Errorf("failed to copy resolved link target [%s]: %w", hdr.Name, err)
193+
}
194+
} else {
195+
if err = w.AddLink(dst, dsts[0], true); err != nil {
196+
return fmt.Errorf("failed to add links to copied file [%s]: %w", dst, err)
197+
}
198+
}
199+
}
200+
}
201+
202+
for _, pl := range linksToAdd {
203+
if err = w.AddLink(pl.name, pl.target, false); err != nil {
204+
return fmt.Errorf("failed to add link from [%s] to [%s]: %w", pl.name, pl.target, err)
205+
}
206+
}
207+
return nil
208+
}

0 commit comments

Comments
 (0)