Skip to content

Commit 863af5b

Browse files
Merge pull request #387 from andreidenissov-cog/feature/386
Implement local filesystem option for studioml experiment storage/database.
2 parents a8c0b9a + 0291be0 commit 863af5b

File tree

5 files changed

+175
-1
lines changed

5 files changed

+175
-1
lines changed

docs/local_filesystem_setup.rst

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
Setting up experiment storage and database in local filesystem
2+
==============================================================
3+
4+
This page describes how to setup studioml to use
5+
local filesystem for storing experiment artifacts and meta-data.
6+
With this option, there is no need to setup any external
7+
connection to S3/Minio/GCS etc.
8+
9+
StudioML configuration
10+
--------------------
11+
12+
::
13+
14+
"studio_ml_config": {
15+
16+
...
17+
18+
"database": {
19+
"type": "local",
20+
"endpoint": SOME_DB_LOCAL_PATH,
21+
"bucket": DB_BUCKET_NAME,
22+
"authentication": "none"
23+
},
24+
"storage": {
25+
"type": "local",
26+
"endpoint": SOME_ARTIFACTS_LOCAL_PATH,
27+
"bucket": ARTIFACTS_BUCKET_NAME,
28+
}
29+
30+
...
31+
}
32+
33+
34+
With StudioML database type set to "local",
35+
all experiment meta-data will be stored locally under
36+
directory: SOME_DB_LOCAL_PATH/DB_BUCKET_NAME.
37+
Similarly, with storage type set to "local",
38+
all experiment artifacts will be stored locally under
39+
directory: SOME_ARTIFACTS_LOCAL_PATH/ARTIFACTS_BUCKET_NAME.
40+
41+
Note: if you are using "local" mode, it is recommended to use it
42+
for both storage and database configuration.
43+
But it's technically possible to mix, for example, local storage configuration
44+
and S3-based database configuration etc.
45+

studio/artifact_store.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from .firebase_artifact_store import FirebaseArtifactStore
22
from .gcloud_artifact_store import GCloudArtifactStore
3+
from .local_artifact_store import LocalArtifactStore
34
from .s3_artifact_store import S3ArtifactStore
45

5-
66
def get_artifact_store(config, blocking_auth=True, verbose=10):
77
if config['type'].lower() == 'firebase':
88
return FirebaseArtifactStore(
@@ -11,5 +11,7 @@ def get_artifact_store(config, blocking_auth=True, verbose=10):
1111
return GCloudArtifactStore(config, verbose=verbose)
1212
elif config['type'].lower() == 's3':
1313
return S3ArtifactStore(config, verbose=verbose)
14+
elif config['type'].lower() == 'local':
15+
return LocalArtifactStore(config, verbose=verbose)
1416
else:
1517
raise ValueError('Unknown storage type: ' + config['type'])

studio/local_artifact_store.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import calendar
2+
import os
3+
import shutil
4+
5+
from .tartifact_store import TartifactStore
6+
7+
class LocalArtifactStore(TartifactStore):
8+
def __init__(self, config,
9+
bucket_name=None,
10+
verbose=10,
11+
measure_timestamp_diff=False,
12+
compression=None):
13+
14+
if compression is None:
15+
compression = config.get('compression')
16+
17+
self.endpoint = config.get('endpoint', '~')
18+
self.store_root = os.path.realpath(os.path.expanduser(self.endpoint))
19+
if not os.path.exists(self.store_root) \
20+
or not os.path.isdir(self.store_root):
21+
raise ValueError()
22+
23+
self.bucket = bucket_name
24+
if self.bucket is None:
25+
self.bucket = config.get('bucket')
26+
self.store_root = os.path.join(self.store_root, self.bucket)
27+
self._ensure_path_dirs_exist(self.store_root)
28+
29+
super(LocalArtifactStore, self).__init__(
30+
measure_timestamp_diff,
31+
compression=compression,
32+
verbose=verbose)
33+
34+
def _ensure_path_dirs_exist(self, path):
35+
dirs = os.path.dirname(path)
36+
os.makedirs(dirs, mode = 0o777, exist_ok = True)
37+
38+
def _upload_file(self, key, local_path):
39+
target_path = os.path.join(self.store_root, key)
40+
self._ensure_path_dirs_exist(target_path)
41+
shutil.copyfile(local_path, target_path)
42+
43+
def _download_file(self, key, local_path, bucket=None):
44+
source_path = os.path.join(self.store_root, key)
45+
self._ensure_path_dirs_exist(local_path)
46+
shutil.copyfile(source_path, local_path)
47+
48+
def _delete_file(self, key):
49+
os.remove(os.path.join(self.store_root, key))
50+
51+
def _get_file_url(self, key, method='GET'):
52+
return str(os.path.join(self.store_root, key))
53+
54+
def _get_file_post(self, key):
55+
return str(os.path.join(self.store_root, key))
56+
57+
def _get_file_timestamp(self, key):
58+
return None
59+
60+
def get_qualified_location(self, key):
61+
return 'file:/' + self.store_root + '/' + key
62+
63+
def get_bucket(self):
64+
return self.bucket

studio/local_db_provider.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import os
2+
import json
3+
4+
from .keyvalue_provider import KeyValueProvider
5+
from .local_artifact_store import LocalArtifactStore
6+
7+
class LocalDbProvider(KeyValueProvider):
8+
9+
def __init__(self, config, blocking_auth=True, verbose=10, store=None):
10+
self.config = config
11+
self.bucket = config.get('bucket', 'studioml-meta')
12+
13+
self.endpoint = config.get('endpoint', '~')
14+
self.db_root = os.path.realpath(os.path.expanduser(self.endpoint))
15+
if not os.path.exists(self.db_root) \
16+
or not os.path.isdir(self.db_root):
17+
raise ValueError("Local DB root {} doesn't exist or not a directory!".format(self.db_root))
18+
19+
self.bucket = config.get('bucket')
20+
self.db_root = os.path.join(self.db_root, self.bucket)
21+
self._ensure_path_dirs_exist(self.db_root)
22+
23+
super(LocalDbProvider, self).__init__(
24+
config,
25+
blocking_auth,
26+
verbose,
27+
store)
28+
29+
def _ensure_path_dirs_exist(self, path):
30+
dirs = os.path.dirname(path)
31+
os.makedirs(dirs, mode = 0o777, exist_ok = True)
32+
33+
def _get(self, key, shallow=False):
34+
file_name = os.path.join(self.db_root, key)
35+
if not os.path.exists(file_name):
36+
return None
37+
with open(file_name) as infile:
38+
result = json.load(infile)
39+
return result
40+
41+
def _delete(self, key):
42+
file_name = os.path.join(self.db_root, key)
43+
if os.path.exists(file_name):
44+
os.remove(file_name)
45+
46+
def _set(self, key, value):
47+
file_name = os.path.join(self.db_root, key)
48+
self._ensure_path_dirs_exist(file_name)
49+
with open(file_name, 'w') as outfile:
50+
json.dump(value, outfile)
51+

studio/model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from .artifact_store import get_artifact_store
1515
from .http_provider import HTTPProvider
1616
from .firebase_provider import FirebaseProvider
17+
from .local_artifact_store import LocalArtifactStore
18+
from .local_db_provider import LocalDbProvider
1719
from .s3_provider import S3Provider
1820
from .gs_provider import GSProvider
1921
from .model_setup import setup_model
@@ -102,6 +104,16 @@ def get_db_provider(config=None, blocking_auth=True):
102104
blocking_auth=blocking_auth)
103105
artifact_store = db_provider.get_artifact_store()
104106

107+
elif db_config['type'].lower() == 'local':
108+
if artifact_store is None:
109+
artifact_store = LocalArtifactStore(db_config, "storage", verbose)
110+
111+
db_provider = LocalDbProvider(db_config,
112+
verbose=verbose,
113+
store=artifact_store,
114+
blocking_auth=blocking_auth)
115+
artifact_store = db_provider.get_artifact_store()
116+
105117
else:
106118
_model_setup = None
107119
raise ValueError('Unknown type of the database ' + db_config['type'])

0 commit comments

Comments
 (0)