@@ -44,7 +44,7 @@ type DirInodeData struct {
44
44
DirTime time.Time
45
45
ImplicitDir bool
46
46
47
- listMarker * string
47
+ listMarker string
48
48
lastFromCloud * string
49
49
listDone bool
50
50
forgetDuringList bool
@@ -60,44 +60,22 @@ type DirInodeData struct {
60
60
handles []* DirHandle
61
61
}
62
62
63
- // Returns true if any char in `inp` has a value < '/'.
63
+ // Returns the position of first char < '/' in `inp` after prefixLen + any continued '/' characters .
64
64
// This should work for unicode also: unicode chars are all greater than 128.
65
65
// See TestHasCharLtSlash for examples.
66
- func hasCharLtSlash (inp string ) bool {
67
- for _ , c := range inp {
68
- if c < '/' {
69
- return true
70
- }
66
+ func locateLtSlash (inp string , prefixLen int ) int {
67
+ i := prefixLen
68
+ for i < len (inp ) && inp [i ] == '/' {
69
+ i ++
71
70
}
72
- return false
73
- }
74
-
75
- // Gets the name of the blob/prefix from a full cloud path.
76
- // See TestCloudPathToName for examples.
77
- func cloudPathToName (inp string ) string {
78
- inp = strings .TrimRight (inp , "/" )
79
- split := strings .Split (inp , "/" )
80
- return split [len (split )- 1 ]
81
- }
82
-
83
- // Returns true if the last prefix's name or last item's name from the given
84
- // ListBlobsOutput has a character less than '/'
85
- // See TestShouldFetchNextListBlobsPage for examples.
86
- func shouldFetchNextListBlobsPage (resp * ListBlobsOutput ) bool {
87
- if ! resp .IsTruncated {
88
- // There is no next page.
89
- return false
90
- }
91
- numPrefixes := len (resp .Prefixes )
92
- numItems := len (resp .Items )
93
- if numPrefixes > 0 &&
94
- hasCharLtSlash (cloudPathToName (* resp .Prefixes [numPrefixes - 1 ].Prefix )) {
95
- return true
96
- } else if numItems > 0 &&
97
- hasCharLtSlash (cloudPathToName (* resp .Items [numItems - 1 ].Key )) {
98
- return true
71
+ // first character < / doesn't matter, because directory names can't start with /
72
+ i ++
73
+ for ; i < len (inp ); i ++ {
74
+ if inp [i ] < '/' {
75
+ return i
76
+ }
99
77
}
100
- return false
78
+ return - 1
101
79
}
102
80
103
81
type DirHandle struct {
@@ -395,7 +373,7 @@ func (dir *DirInodeData) checkGapLoaded(key string, newerThan time.Time) bool {
395
373
396
374
// LOCKS_REQUIRED(inode.mu)
397
375
func (inode * Inode ) sealDir () {
398
- inode .dir .listMarker = nil
376
+ inode .dir .listMarker = ""
399
377
inode .dir .listDone = true
400
378
inode .dir .lastFromCloud = nil
401
379
inode .dir .DirTime = time .Now ()
@@ -478,6 +456,97 @@ func (dh *DirHandle) handleListResult(resp *ListBlobsOutput, prefix string, skip
478
456
}
479
457
}
480
458
459
+ func maxName (resp * ListBlobsOutput , itemPos , prefixPos int ) (string , int ) {
460
+ if itemPos > 0 && (prefixPos == 0 || * resp .Prefixes [prefixPos - 1 ].Prefix < * resp .Items [itemPos - 1 ].Key ) {
461
+ return * resp .Items [itemPos - 1 ].Key , 0
462
+ }
463
+ if prefixPos == 0 {
464
+ return "" , 0
465
+ }
466
+ return * resp .Prefixes [prefixPos - 1 ].Prefix , 1
467
+ }
468
+
469
+ func prefixLarger (s1 , s2 string , pos int ) bool {
470
+ if len (s2 ) <= pos {
471
+ return s1 [0 :pos ] < s2
472
+ }
473
+ return s1 [0 :pos ] < s2 [0 :pos ]
474
+ }
475
+
476
+ // In s3 & azblob, prefixes are returned with '/' => the prefix "2019" is
477
+ // returned as "2019/". So the list api for these backends returns "2019/" after
478
+ // "2019-0001/" because ascii("/") > ascii("-"). This is problematic for us if
479
+ // "2019/" is returned in x+1'th batch and "2019-0001/" is returned in x'th
480
+ // because GeeseFS returns results to the client in the sorted order.
481
+ //
482
+ // To overcome this issue, previously we continued to next listing pages
483
+ // until the last item of the listing didn't contain characters less than '/'
484
+ // anymore. But it was bad because '.' also precedes '/' and this made GeeseFS
485
+ // fully load almost all directories, because most directories contain files
486
+ // with extensions.
487
+ //
488
+ // So now we intelligently cut the result array so that the last item
489
+ // either doesn't contain characters before '/' or is followed by another item
490
+ // with a larger prefix (i.e. 'abc-def', then 'abd-' or 'abca-'). And if it's
491
+ // impossible, i.e. if the whole listing contains items with the same prefix,
492
+ // we issues an additional HEAD request and check for the presence of an item
493
+ // with slash ('abc-def' is in listing, then we check 'abc/').
494
+ //
495
+ // Relevant test case: TestReadDirDash
496
+ func intelligentListCut (resp * ListBlobsOutput , cloud StorageBackend , prefix string ) (lastName string , err error ) {
497
+ if ! resp .IsTruncated {
498
+ return
499
+ }
500
+ isPrefix := 0
501
+ itemPos , prefixPos := len (resp .Items ), len (resp .Prefixes )
502
+ count := itemPos + prefixPos
503
+ lastName , isPrefix = maxName (resp , itemPos , prefixPos )
504
+ lastLtPos := locateLtSlash (lastName , len (prefix ))
505
+ if lastLtPos >= 0 {
506
+ prev , name := "" , lastName
507
+ ok := false
508
+ for itemPos + prefixPos > count / 2 {
509
+ prefixPos -= isPrefix
510
+ itemPos -= (1 - isPrefix )
511
+ prev , isPrefix = maxName (resp , itemPos , prefixPos )
512
+ ltPos := locateLtSlash (prev , len (prefix ))
513
+ if ltPos < 0 || prefixLarger (prev , name , ltPos + 1 ) {
514
+ // No characters less than '/' => OK, stop
515
+ ok = true
516
+ break
517
+ }
518
+ name = prev
519
+ }
520
+ if ok {
521
+ resp .Items = resp .Items [0 :itemPos ]
522
+ resp .Prefixes = resp .Prefixes [0 :prefixPos ]
523
+ lastName = prev
524
+ } else {
525
+ // Can't intelligently cut the list as more than 50% of it has the same prefix
526
+ // So, check for existence of the offending directory separately
527
+ // Simulate '>=' operator with start-after
528
+ // '.' = '/'-1
529
+ // \xF4\x8F\xBF\xBF = 0x10FFFF in UTF-8 = largest code point of 3-byte UTF-8
530
+ // \xEF\xBF\xBD = 0xFFFD in UTF-8 = largest valid symbol of 2-byte UTF-8
531
+ // So, > xxx.\xEF\xBF\xBF is the same as >= xxx/
532
+ dirobj , err := cloud .ListBlobs (& ListBlobsInput {
533
+ StartAfter : PString (lastName [0 :lastLtPos ]+ ".\xEF \xBF \xBD " ),
534
+ MaxKeys : PUInt32 (1 ),
535
+ })
536
+ if err != nil {
537
+ return "" , err
538
+ }
539
+ if len (dirobj .Items ) > 0 {
540
+ checkedName := * dirobj .Items [0 ].Key
541
+ if len (checkedName ) >= lastLtPos + 1 && checkedName [0 :lastLtPos + 1 ] == lastName [0 :lastLtPos ]+ "/" {
542
+ resp .Prefixes = append (resp .Prefixes , BlobPrefixOutput {Prefix : PString (checkedName [0 :lastLtPos + 1 ])})
543
+ }
544
+ }
545
+ }
546
+ }
547
+ return
548
+ }
549
+
481
550
func (dh * DirHandle ) listObjectsFlat () (err error ) {
482
551
cloud , prefix := dh .inode .cloud ()
483
552
if cloud == nil {
@@ -495,9 +564,9 @@ func (dh *DirHandle) listObjectsFlat() (err error) {
495
564
myList := dh .inode .fs .addInflightListing ()
496
565
497
566
params := & ListBlobsInput {
498
- Delimiter : PString ("/" ),
499
- ContinuationToken : dh .inode .dir .listMarker ,
500
- Prefix : & prefix ,
567
+ Delimiter : PString ("/" ),
568
+ StartAfter : PString ( dh .inode .dir .listMarker ) ,
569
+ Prefix : & prefix ,
501
570
}
502
571
503
572
dh .mu .Unlock ()
@@ -511,36 +580,19 @@ func (dh *DirHandle) listObjectsFlat() (err error) {
511
580
512
581
s3Log .Debug (resp )
513
582
583
+ // See comment to intelligentListCut above
584
+ lastName , err := intelligentListCut (resp , cloud , prefix )
585
+ if err != nil {
586
+ dh .inode .fs .completeInflightListing (myList )
587
+ }
588
+
514
589
dh .inode .mu .Lock ()
515
590
dh .handleListResult (resp , prefix , dh .inode .fs .completeInflightListing (myList ))
516
591
517
- if resp .IsTruncated && resp .NextContinuationToken != nil {
518
- // :-X idiotic aws-sdk with string pointers was leading to a huge memory leak here
519
- next := * resp .NextContinuationToken
520
- dh .inode .dir .listMarker = & next
521
- // In s3 & azblob, prefixes are returned with '/' => the prefix "2019" is
522
- // returned as "2019/". So the list api for these backends returns "2019/" after
523
- // "2019-0001/" because ascii("/") > ascii("-"). This is problematic for us if
524
- // "2019/" is returned in x+1'th batch and "2019-0001/" is returned in x'th
525
- // because GeeseFS returns results to the client in the sorted order.
526
- //
527
- // We could always use ordering of s3/azblob but other cloud providers may have
528
- // different sorting strategies. For example, in ADLv2 "a/" is < "a-b/".
529
- //
530
- // We also could intelligently cut the result array so that the last item
531
- // either doesn't contain characters before '/' or is followed by another item
532
- // with a larger prefix (i.e. 'abc-def', then 'abd-' or 'abca-') in the removed
533
- // portion, but it's probably rarely worth it because `readdir` listings are
534
- // usually anyway full. Also it would require using StartAfter instead of
535
- // ContinuationToken. Nevertheless, it may be a FIXME for the future :-)
536
- //
537
- // So, currently we just proceed to next pages in this case.
538
- //
539
- // Relevant test case: TestReadDirDash
540
- if shouldFetchNextListBlobsPage (resp ) {
541
- // Tell loadListing() that we want more pages :)
542
- dh .inode .dir .lastFromCloud = nil
543
- }
592
+ if resp .IsTruncated {
593
+ // :-X for history: aws-sdk with its idiotic string pointers was leading
594
+ // to a huge memory leak when we were saving NextContinuationToken as is
595
+ dh .inode .dir .listMarker = lastName
544
596
} else {
545
597
dh .inode .sealDir ()
546
598
}
@@ -576,7 +628,7 @@ func (dh *DirHandle) checkDirPosition() {
576
628
func (dh * DirHandle ) loadListing () error {
577
629
parent := dh .inode
578
630
579
- if ! parent .dir .listDone && parent .dir .listMarker == nil {
631
+ if ! parent .dir .listDone && parent .dir .listMarker == "" {
580
632
// listMarker is nil => We just started refreshing this directory
581
633
parent .dir .listDone = false
582
634
parent .dir .lastFromCloud = nil
@@ -592,7 +644,7 @@ func (dh *DirHandle) loadListing() error {
592
644
// we immediately switch to regular listings.
593
645
// Original implementation in Goofys in fact was similar in this aspect
594
646
// but it was ugly in several places, so ... sorry, it's reworked. O:-)
595
- useSlurp := parent .dir .listMarker == nil && parent .fs .flags .StatCacheTTL != 0
647
+ useSlurp := parent .dir .listMarker == "" && parent .fs .flags .StatCacheTTL != 0
596
648
597
649
// the dir expired, so we need to fetch from the cloud. there
598
650
// may be static directories that we want to keep, so cloud
0 commit comments