Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions docs/local_filesystem_setup.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
Setting up experiment storage and database in local filesystem
==============================================================

This page describes how to setup studioml to use
local filesystem for storing experiment artifacts and meta-data.
With this option, there is no need to setup any external
connection to S3/Minio/GCS etc.

StudioML configuration
--------------------

::

"studio_ml_config": {

...

"database": {
"type": "local",
"endpoint": SOME_DB_LOCAL_PATH,
"bucket": DB_BUCKET_NAME,
"authentication": "none"
},
"storage": {
"type": "local",
"endpoint": SOME_ARTIFACTS_LOCAL_PATH,
"bucket": ARTIFACTS_BUCKET_NAME,
}

...
}


With StudioML database type set to "local",
all experiment meta-data will be stored locally under
directory: SOME_DB_LOCAL_PATH/DB_BUCKET_NAME.
Similarly, with storage type set to "local",
all experiment artifacts will be stored locally under
directory: SOME_ARTIFACTS_LOCAL_PATH/ARTIFACTS_BUCKET_NAME.

Note: if you are using "local" mode, it is recommended to use it
for both storage and database configuration.
But it's technically possible to mix, for example, local storage configuration
and S3-based database configuration etc.

4 changes: 3 additions & 1 deletion studio/artifact_store.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from .firebase_artifact_store import FirebaseArtifactStore
from .gcloud_artifact_store import GCloudArtifactStore
from .local_artifact_store import LocalArtifactStore
from .s3_artifact_store import S3ArtifactStore


def get_artifact_store(config, blocking_auth=True, verbose=10):
if config['type'].lower() == 'firebase':
return FirebaseArtifactStore(
Expand All @@ -11,5 +11,7 @@ def get_artifact_store(config, blocking_auth=True, verbose=10):
return GCloudArtifactStore(config, verbose=verbose)
elif config['type'].lower() == 's3':
return S3ArtifactStore(config, verbose=verbose)
elif config['type'].lower() == 'local':
return LocalArtifactStore(config, verbose=verbose)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding "local" option to our selector.

else:
raise ValueError('Unknown storage type: ' + config['type'])
64 changes: 64 additions & 0 deletions studio/local_artifact_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import calendar
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation is as straightforward as it gets:
basically, we just copy files around with no intent to optimize things.

import os
import shutil

from .tartifact_store import TartifactStore

class LocalArtifactStore(TartifactStore):
def __init__(self, config,
bucket_name=None,
verbose=10,
measure_timestamp_diff=False,
compression=None):

if compression is None:
compression = config.get('compression')

self.endpoint = config.get('endpoint', '~')
self.store_root = os.path.realpath(os.path.expanduser(self.endpoint))
if not os.path.exists(self.store_root) \
or not os.path.isdir(self.store_root):
raise ValueError()

self.bucket = bucket_name
if self.bucket is None:
self.bucket = config.get('bucket')
self.store_root = os.path.join(self.store_root, self.bucket)
self._ensure_path_dirs_exist(self.store_root)

super(LocalArtifactStore, self).__init__(
measure_timestamp_diff,
compression=compression,
verbose=verbose)

def _ensure_path_dirs_exist(self, path):
dirs = os.path.dirname(path)
os.makedirs(dirs, mode = 0o777, exist_ok = True)

def _upload_file(self, key, local_path):
target_path = os.path.join(self.store_root, key)
self._ensure_path_dirs_exist(target_path)
shutil.copyfile(local_path, target_path)

def _download_file(self, key, local_path, bucket=None):
source_path = os.path.join(self.store_root, key)
self._ensure_path_dirs_exist(local_path)
shutil.copyfile(source_path, local_path)

def _delete_file(self, key):
os.remove(os.path.join(self.store_root, key))

def _get_file_url(self, key, method='GET'):
return str(os.path.join(self.store_root, key))

def _get_file_post(self, key):
return str(os.path.join(self.store_root, key))

def _get_file_timestamp(self, key):
return None

def get_qualified_location(self, key):
return 'file:/' + self.store_root + '/' + key

def get_bucket(self):
return self.bucket
51 changes: 51 additions & 0 deletions studio/local_db_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import json
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Database low-level logic implemented simply as directory with file names being the keys
and json values serialized into files themselves.


from .keyvalue_provider import KeyValueProvider
from .local_artifact_store import LocalArtifactStore

class LocalDbProvider(KeyValueProvider):

def __init__(self, config, blocking_auth=True, verbose=10, store=None):
self.config = config
self.bucket = config.get('bucket', 'studioml-meta')

self.endpoint = config.get('endpoint', '~')
self.db_root = os.path.realpath(os.path.expanduser(self.endpoint))
if not os.path.exists(self.db_root) \
or not os.path.isdir(self.db_root):
raise ValueError("Local DB root {} doesn't exist or not a directory!".format(self.db_root))

self.bucket = config.get('bucket')
self.db_root = os.path.join(self.db_root, self.bucket)
self._ensure_path_dirs_exist(self.db_root)

super(LocalDbProvider, self).__init__(
config,
blocking_auth,
verbose,
store)

def _ensure_path_dirs_exist(self, path):
dirs = os.path.dirname(path)
os.makedirs(dirs, mode = 0o777, exist_ok = True)

def _get(self, key, shallow=False):
file_name = os.path.join(self.db_root, key)
if not os.path.exists(file_name):
return None
with open(file_name) as infile:
result = json.load(infile)
return result

def _delete(self, key):
file_name = os.path.join(self.db_root, key)
if os.path.exists(file_name):
os.remove(file_name)

def _set(self, key, value):
file_name = os.path.join(self.db_root, key)
self._ensure_path_dirs_exist(file_name)
with open(file_name, 'w') as outfile:
json.dump(value, outfile)

12 changes: 12 additions & 0 deletions studio/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .artifact_store import get_artifact_store
from .http_provider import HTTPProvider
from .firebase_provider import FirebaseProvider
from .local_artifact_store import LocalArtifactStore
from .local_db_provider import LocalDbProvider
from .s3_provider import S3Provider
from .gs_provider import GSProvider
from .model_setup import setup_model
Expand Down Expand Up @@ -102,6 +104,16 @@ def get_db_provider(config=None, blocking_auth=True):
blocking_auth=blocking_auth)
artifact_store = db_provider.get_artifact_store()

elif db_config['type'].lower() == 'local':
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding "local" option to our selector.

if artifact_store is None:
artifact_store = LocalArtifactStore(db_config, "storage", verbose)

db_provider = LocalDbProvider(db_config,
verbose=verbose,
store=artifact_store,
blocking_auth=blocking_auth)
artifact_store = db_provider.get_artifact_store()

else:
_model_setup = None
raise ValueError('Unknown type of the database ' + db_config['type'])
Expand Down