15
15
)
16
16
17
17
__author__ = "Brian Maloney"
18
- __version__ = "2022.03.04 "
18
+ __version__ = "2022.03.11 "
19
19
20
20
21
21
ASCII_BYTE = rb" !#\$%&\'\(\)\+,-\.0123456789;=@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\}\~\t"
@@ -52,8 +52,11 @@ def subset(dict_, keys):
52
52
cache = {}
53
53
final = []
54
54
55
- for row in df .sort_values (by = ['Level' , 'ParentId' , 'Type' ], ascending = [False , False , False ]).to_dict ('records' ):
56
- file = subset (row , keys = ('ParentId' , 'DriveItemId' , 'eTag' , 'Type' , 'Name' , 'Size' , 'Children' ))
55
+ df .loc [df .Type == 'File' , ['FileSort' ]] = df ['Name' ].str .lower ()
56
+ df .loc [df .Type == 'Folder' , ['FolderSort' ]] = df ['Name' ].str .lower ()
57
+
58
+ for row in df .sort_values (by = ['Level' , 'ParentId' , 'Type' , 'FileSort' , 'FolderSort' ], ascending = [False , False , False , True , False ]).to_dict ('records' ):
59
+ file = subset (row , keys = ('ParentId' , 'DriveItemId' , 'eTag' , 'Type' , 'Path' , 'Name' , 'Size' , 'Children' ))
57
60
if row ['Type' ] == 'File' :
58
61
folder = cache .setdefault (row ['ParentId' ], {})
59
62
folder .setdefault ('Children' , []).append (file )
@@ -70,10 +73,12 @@ def subset(dict_, keys):
70
73
'DriveItemId' : '' ,
71
74
'eTag' : '' ,
72
75
'Type' : 'Root Drive' ,
76
+ 'Path' : '' ,
73
77
'Name' : name ,
74
78
'Size' : '' ,
75
79
'Children' : ''
76
80
}
81
+
77
82
cache ['Children' ] = final
78
83
79
84
if pretty :
@@ -100,10 +105,7 @@ def subset(dict_, keys):
100
105
def print_csv (df , name , csv_path , csv_name ):
101
106
df = df .sort_values (by = ['Level' , 'ParentId' , 'Type' ], ascending = [True , False , False ])
102
107
df = df .drop (['Children' , 'Level' ], axis = 1 )
103
- id_name_dict = dict (zip (df .DriveItemId , df .Name ))
104
- parent_dict = dict (zip (df .DriveItemId , df .ParentId ))
105
108
106
- df ['Path' ] = df .DriveItemId .apply (lambda x : find_parent (x , id_name_dict , parent_dict ).lstrip ('\\ \\ ' ))
107
109
csv_file = os .path .basename (name ).split ('.' )[0 ]+ "_OneDrive.csv"
108
110
if csv_name :
109
111
csv_file = csv_name
@@ -116,10 +118,7 @@ def print_csv(df, name, csv_path, csv_name):
116
118
def print_html (df , name , html_path ):
117
119
df = df .sort_values (by = ['Level' , 'ParentId' , 'Type' ], ascending = [True , False , False ])
118
120
df = df .drop (['Children' , 'Level' ], axis = 1 )
119
- id_name_dict = dict (zip (df .DriveItemId , df .Name ))
120
- parent_dict = dict (zip (df .DriveItemId , df .ParentId ))
121
121
122
- df ['Path' ] = df .DriveItemId .apply (lambda x : find_parent (x , id_name_dict , parent_dict ).lstrip ('\\ \\ ' ))
123
122
html_file = os .path .basename (name ).split ('.' )[0 ]+ "_OneDrive.html"
124
123
file_extension = os .path .splitext (name )[1 ][1 :]
125
124
if file_extension == 'previous' :
@@ -160,25 +159,32 @@ def parse_onedrive(usercid, reghive, json_path, csv_path, csv_name, pretty, html
160
159
'Size' ,
161
160
'Children' ])
162
161
dir_index = []
163
- for match in re .finditer (uuid4hex , f .read ()):
164
- s = match .start ()
165
- eTag = match .group (1 ).decode ("utf-8" )
162
+ entries = re .finditer (uuid4hex , f .read ())
163
+ current = next (entries , total )
164
+ while isinstance (current , re .Match ):
165
+ s = current .start ()
166
+ eTag = current .group (1 ).decode ("utf-8" )
166
167
count = s
167
168
diroffset = s - 39
168
169
objoffset = s - 78
169
170
f .seek (objoffset )
170
171
ouuid = f .read (32 ).decode ("utf-8" ).strip ('\u0000 ' )
171
172
f .seek (diroffset )
172
173
duuid = f .read (32 ).decode ("utf-8" ).strip ('\u0000 ' )
173
- name , name_s = unicode_strings (f .read (400 ))
174
+ n_current = next (entries , total )
175
+ try :
176
+ buffer = n_current .start () - f .tell ()
177
+ except AttributeError :
178
+ buffer = n_current - f .tell ()
179
+ name , name_s = unicode_strings (f .read (buffer ))
174
180
try :
175
181
sizeoffset = diroffset + 24 + name_s
176
182
f .seek (sizeoffset )
177
183
size = int .from_bytes (f .read (8 ), "little" )
178
184
except :
179
185
size = name_s
180
186
f .seek (diroffset + 32 )
181
- logging .error (f'An error occured trying to find the name of { ouuid } . Raw Data:{ f .read (400 )} ' )
187
+ logging .error (f'An error occured trying to find the name of { ouuid } . Raw Data:{ f .read (buffer )} ' )
182
188
if not dir_index :
183
189
if reghive and personal :
184
190
try :
@@ -212,18 +218,15 @@ def parse_onedrive(usercid, reghive, json_path, csv_path, csv_name, pretty, html
212
218
213
219
dir_index .append (input )
214
220
progress (count , total , status = 'Building folder list. Please wait....' )
221
+ current = n_current
215
222
216
223
print ('\n ' )
217
224
218
225
df = pd .DataFrame .from_records (dir_index )
219
226
df .loc [(df .DriveItemId .isin (df .ParentId )) | (df .Size == 2880154368 ), ['Type' , 'Size' ]] = ['Folder' , '' ]
220
227
df .at [0 , 'Type' ] = 'Root Default'
221
- id_name_dict = dict (zip (df .DriveItemId , df .Name ))
222
- parent_dict = dict (zip (df .DriveItemId , df .ParentId ))
223
-
224
- df ['Level' ] = df .DriveItemId .apply (lambda x : len (find_parent (x , id_name_dict , parent_dict ).lstrip ('\\ \\ ' ).split ('\\ \\ ' )))
225
228
226
- share_df = df .loc [(df . Level == 1 ) & ( ~ df .ParentId .isin (df .DriveItemId )) & (df .Type != 'Root Default' )]
229
+ share_df = df .loc [(~ df .ParentId .isin (df .DriveItemId )) & (df .Type != 'Root Default' )]
227
230
share_list = list (set (share_df .ParentId ))
228
231
share_root = []
229
232
@@ -251,6 +254,12 @@ def parse_onedrive(usercid, reghive, json_path, csv_path, csv_name, pretty, html
251
254
logging .warning (f'Unable to read registry hive! { e } ' )
252
255
pass
253
256
257
+ id_name_dict = dict (zip (df .DriveItemId , df .Name ))
258
+ parent_dict = dict (zip (df .DriveItemId , df .ParentId ))
259
+ df ['Path' ] = df .DriveItemId .apply (lambda x : find_parent (x , id_name_dict , parent_dict ).lstrip ('\\ \\ ' ).split ('\\ \\ ' ))
260
+ df ['Level' ] = df ['Path' ].str .len ()
261
+ df ['Path' ] = df ['Path' ].str .join ('\\ ' )
262
+
254
263
if csv_path :
255
264
print_csv (df , f .name , csv_path , csv_name )
256
265
if html_path :
0 commit comments