Skip to content

Commit 38830ed

Browse files
committed
Implement intelligent list cutting to better fix the problem with characters less than /
1 parent e72e0be commit 38830ed

File tree

4 files changed

+239
-141
lines changed

4 files changed

+239
-141
lines changed

internal/backend_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package internal
1818

1919
type TestBackend struct {
2020
StorageBackend
21+
ListBlobsFunc func(param *ListBlobsInput) (*ListBlobsOutput, error)
2122
err error
2223
}
2324

@@ -32,6 +33,9 @@ func (s *TestBackend) ListBlobs(param *ListBlobsInput) (*ListBlobsOutput, error)
3233
if s.err != nil {
3334
return nil, s.err
3435
}
36+
if s.ListBlobsFunc != nil {
37+
return s.ListBlobsFunc(param)
38+
}
3539
return s.StorageBackend.ListBlobs(param)
3640
}
3741

internal/dir.go

Lines changed: 120 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ type DirInodeData struct {
4444
DirTime time.Time
4545
ImplicitDir bool
4646

47-
listMarker *string
47+
listMarker string
4848
lastFromCloud *string
4949
listDone bool
5050
forgetDuringList bool
@@ -60,44 +60,22 @@ type DirInodeData struct {
6060
handles []*DirHandle
6161
}
6262

63-
// Returns true if any char in `inp` has a value < '/'.
63+
// Returns the position of first char < '/' in `inp` after prefixLen + any continued '/' characters.
6464
// This should work for unicode also: unicode chars are all greater than 128.
6565
// See TestHasCharLtSlash for examples.
66-
func hasCharLtSlash(inp string) bool {
67-
for _, c := range inp {
68-
if c < '/' {
69-
return true
70-
}
66+
func locateLtSlash(inp string, prefixLen int) int {
67+
i := prefixLen
68+
for i < len(inp) && inp[i] == '/' {
69+
i++
7170
}
72-
return false
73-
}
74-
75-
// Gets the name of the blob/prefix from a full cloud path.
76-
// See TestCloudPathToName for examples.
77-
func cloudPathToName(inp string) string {
78-
inp = strings.TrimRight(inp, "/")
79-
split := strings.Split(inp, "/")
80-
return split[len(split)-1]
81-
}
82-
83-
// Returns true if the last prefix's name or last item's name from the given
84-
// ListBlobsOutput has a character less than '/'
85-
// See TestShouldFetchNextListBlobsPage for examples.
86-
func shouldFetchNextListBlobsPage(resp *ListBlobsOutput) bool {
87-
if !resp.IsTruncated {
88-
// There is no next page.
89-
return false
90-
}
91-
numPrefixes := len(resp.Prefixes)
92-
numItems := len(resp.Items)
93-
if numPrefixes > 0 &&
94-
hasCharLtSlash(cloudPathToName(*resp.Prefixes[numPrefixes-1].Prefix)) {
95-
return true
96-
} else if numItems > 0 &&
97-
hasCharLtSlash(cloudPathToName(*resp.Items[numItems-1].Key)) {
98-
return true
71+
// first character < / doesn't matter, because directory names can't start with /
72+
i++
73+
for ; i < len(inp); i++ {
74+
if inp[i] < '/' {
75+
return i
76+
}
9977
}
100-
return false
78+
return -1
10179
}
10280

10381
type DirHandle struct {
@@ -395,7 +373,7 @@ func (dir *DirInodeData) checkGapLoaded(key string, newerThan time.Time) bool {
395373

396374
// LOCKS_REQUIRED(inode.mu)
397375
func (inode *Inode) sealDir() {
398-
inode.dir.listMarker = nil
376+
inode.dir.listMarker = ""
399377
inode.dir.listDone = true
400378
inode.dir.lastFromCloud = nil
401379
inode.dir.DirTime = time.Now()
@@ -478,6 +456,97 @@ func (dh *DirHandle) handleListResult(resp *ListBlobsOutput, prefix string, skip
478456
}
479457
}
480458

459+
func maxName(resp *ListBlobsOutput, itemPos, prefixPos int) (string, int) {
460+
if itemPos > 0 && (prefixPos == 0 || *resp.Prefixes[prefixPos-1].Prefix < *resp.Items[itemPos-1].Key) {
461+
return *resp.Items[itemPos-1].Key, 0
462+
}
463+
if prefixPos == 0 {
464+
return "", 0
465+
}
466+
return *resp.Prefixes[prefixPos-1].Prefix, 1
467+
}
468+
469+
func prefixLarger(s1, s2 string, pos int) bool {
470+
if len(s2) <= pos {
471+
return s1[0:pos] < s2
472+
}
473+
return s1[0:pos] < s2[0:pos]
474+
}
475+
476+
// In s3 & azblob, prefixes are returned with '/' => the prefix "2019" is
477+
// returned as "2019/". So the list api for these backends returns "2019/" after
478+
// "2019-0001/" because ascii("/") > ascii("-"). This is problematic for us if
479+
// "2019/" is returned in x+1'th batch and "2019-0001/" is returned in x'th
480+
// because GeeseFS returns results to the client in the sorted order.
481+
//
482+
// To overcome this issue, previously we continued to next listing pages
483+
// until the last item of the listing didn't contain characters less than '/'
484+
// anymore. But it was bad because '.' also precedes '/' and this made GeeseFS
485+
// fully load almost all directories, because most directories contain files
486+
// with extensions.
487+
//
488+
// So now we intelligently cut the result array so that the last item
489+
// either doesn't contain characters before '/' or is followed by another item
490+
// with a larger prefix (i.e. 'abc-def', then 'abd-' or 'abca-'). And if it's
491+
// impossible, i.e. if the whole listing contains items with the same prefix,
492+
// we issues an additional HEAD request and check for the presence of an item
493+
// with slash ('abc-def' is in listing, then we check 'abc/').
494+
//
495+
// Relevant test case: TestReadDirDash
496+
func intelligentListCut(resp *ListBlobsOutput, cloud StorageBackend, prefix string) (lastName string, err error) {
497+
if !resp.IsTruncated {
498+
return
499+
}
500+
isPrefix := 0
501+
itemPos, prefixPos := len(resp.Items), len(resp.Prefixes)
502+
count := itemPos+prefixPos
503+
lastName, isPrefix = maxName(resp, itemPos, prefixPos)
504+
lastLtPos := locateLtSlash(lastName, len(prefix))
505+
if lastLtPos >= 0 {
506+
prev, name := "", lastName
507+
ok := false
508+
for itemPos+prefixPos > count/2 {
509+
prefixPos -= isPrefix
510+
itemPos -= (1-isPrefix)
511+
prev, isPrefix = maxName(resp, itemPos, prefixPos)
512+
ltPos := locateLtSlash(prev, len(prefix))
513+
if ltPos < 0 || prefixLarger(prev, name, ltPos+1) {
514+
// No characters less than '/' => OK, stop
515+
ok = true
516+
break
517+
}
518+
name = prev
519+
}
520+
if ok {
521+
resp.Items = resp.Items[0:itemPos]
522+
resp.Prefixes = resp.Prefixes[0:prefixPos]
523+
lastName = prev
524+
} else {
525+
// Can't intelligently cut the list as more than 50% of it has the same prefix
526+
// So, check for existence of the offending directory separately
527+
// Simulate '>=' operator with start-after
528+
// '.' = '/'-1
529+
// \xF4\x8F\xBF\xBF = 0x10FFFF in UTF-8 = largest code point of 3-byte UTF-8
530+
// \xEF\xBF\xBD = 0xFFFD in UTF-8 = largest valid symbol of 2-byte UTF-8
531+
// So, > xxx.\xEF\xBF\xBF is the same as >= xxx/
532+
dirobj, err := cloud.ListBlobs(&ListBlobsInput{
533+
StartAfter: PString(lastName[0:lastLtPos]+".\xEF\xBF\xBD"),
534+
MaxKeys: PUInt32(1),
535+
})
536+
if err != nil {
537+
return "", err
538+
}
539+
if len(dirobj.Items) > 0 {
540+
checkedName := *dirobj.Items[0].Key
541+
if len(checkedName) >= lastLtPos+1 && checkedName[0:lastLtPos+1] == lastName[0:lastLtPos]+"/" {
542+
resp.Prefixes = append(resp.Prefixes, BlobPrefixOutput{Prefix: PString(checkedName[0:lastLtPos+1])})
543+
}
544+
}
545+
}
546+
}
547+
return
548+
}
549+
481550
func (dh *DirHandle) listObjectsFlat() (err error) {
482551
cloud, prefix := dh.inode.cloud()
483552
if cloud == nil {
@@ -495,9 +564,9 @@ func (dh *DirHandle) listObjectsFlat() (err error) {
495564
myList := dh.inode.fs.addInflightListing()
496565

497566
params := &ListBlobsInput{
498-
Delimiter: PString("/"),
499-
ContinuationToken: dh.inode.dir.listMarker,
500-
Prefix: &prefix,
567+
Delimiter: PString("/"),
568+
StartAfter: PString(dh.inode.dir.listMarker),
569+
Prefix: &prefix,
501570
}
502571

503572
dh.mu.Unlock()
@@ -511,36 +580,19 @@ func (dh *DirHandle) listObjectsFlat() (err error) {
511580

512581
s3Log.Debug(resp)
513582

583+
// See comment to intelligentListCut above
584+
lastName, err := intelligentListCut(resp, cloud, prefix)
585+
if err != nil {
586+
dh.inode.fs.completeInflightListing(myList)
587+
}
588+
514589
dh.inode.mu.Lock()
515590
dh.handleListResult(resp, prefix, dh.inode.fs.completeInflightListing(myList))
516591

517-
if resp.IsTruncated && resp.NextContinuationToken != nil {
518-
// :-X idiotic aws-sdk with string pointers was leading to a huge memory leak here
519-
next := *resp.NextContinuationToken
520-
dh.inode.dir.listMarker = &next
521-
// In s3 & azblob, prefixes are returned with '/' => the prefix "2019" is
522-
// returned as "2019/". So the list api for these backends returns "2019/" after
523-
// "2019-0001/" because ascii("/") > ascii("-"). This is problematic for us if
524-
// "2019/" is returned in x+1'th batch and "2019-0001/" is returned in x'th
525-
// because GeeseFS returns results to the client in the sorted order.
526-
//
527-
// We could always use ordering of s3/azblob but other cloud providers may have
528-
// different sorting strategies. For example, in ADLv2 "a/" is < "a-b/".
529-
//
530-
// We also could intelligently cut the result array so that the last item
531-
// either doesn't contain characters before '/' or is followed by another item
532-
// with a larger prefix (i.e. 'abc-def', then 'abd-' or 'abca-') in the removed
533-
// portion, but it's probably rarely worth it because `readdir` listings are
534-
// usually anyway full. Also it would require using StartAfter instead of
535-
// ContinuationToken. Nevertheless, it may be a FIXME for the future :-)
536-
//
537-
// So, currently we just proceed to next pages in this case.
538-
//
539-
// Relevant test case: TestReadDirDash
540-
if shouldFetchNextListBlobsPage(resp) {
541-
// Tell loadListing() that we want more pages :)
542-
dh.inode.dir.lastFromCloud = nil
543-
}
592+
if resp.IsTruncated {
593+
// :-X for history: aws-sdk with its idiotic string pointers was leading
594+
// to a huge memory leak when we were saving NextContinuationToken as is
595+
dh.inode.dir.listMarker = lastName
544596
} else {
545597
dh.inode.sealDir()
546598
}
@@ -576,7 +628,7 @@ func (dh *DirHandle) checkDirPosition() {
576628
func (dh *DirHandle) loadListing() error {
577629
parent := dh.inode
578630

579-
if !parent.dir.listDone && parent.dir.listMarker == nil {
631+
if !parent.dir.listDone && parent.dir.listMarker == "" {
580632
// listMarker is nil => We just started refreshing this directory
581633
parent.dir.listDone = false
582634
parent.dir.lastFromCloud = nil
@@ -592,7 +644,7 @@ func (dh *DirHandle) loadListing() error {
592644
// we immediately switch to regular listings.
593645
// Original implementation in Goofys in fact was similar in this aspect
594646
// but it was ugly in several places, so ... sorry, it's reworked. O:-)
595-
useSlurp := parent.dir.listMarker == nil && parent.fs.flags.StatCacheTTL != 0
647+
useSlurp := parent.dir.listMarker == "" && parent.fs.flags.StatCacheTTL != 0
596648

597649
// the dir expired, so we need to fetch from the cloud. there
598650
// may be static directories that we want to keep, so cloud

0 commit comments

Comments
 (0)