-
Notifications
You must be signed in to change notification settings - Fork 55
Implement local filesystem option for studioml experiment storage/database. #387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
620d0c1
c733c23
5feffa6
dd2ea56
197f3ef
3eb16d4
cf4d9e8
0291be0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
Setting up experiment storage and database in local filesystem | ||
============================================================== | ||
|
||
This page describes how to setup studioml to use | ||
local filesystem for storing experiment artifacts and meta-data. | ||
With this option, there is no need to setup any external | ||
connection to S3/Minio/GCS etc. | ||
|
||
StudioML configuration | ||
-------------------- | ||
|
||
:: | ||
|
||
"studio_ml_config": { | ||
|
||
... | ||
|
||
"database": { | ||
"type": "local", | ||
"endpoint": SOME_DB_LOCAL_PATH, | ||
"bucket": DB_BUCKET_NAME, | ||
"authentication": "none" | ||
}, | ||
"storage": { | ||
"type": "local", | ||
"endpoint": SOME_ARTIFACTS_LOCAL_PATH, | ||
"bucket": ARTIFACTS_BUCKET_NAME, | ||
} | ||
|
||
... | ||
} | ||
|
||
|
||
With StudioML database type set to "local", | ||
all experiment meta-data will be stored locally under | ||
directory: SOME_DB_LOCAL_PATH/DB_BUCKET_NAME. | ||
Similarly, with storage type set to "local", | ||
all experiment artifacts will be stored locally under | ||
directory: SOME_ARTIFACTS_LOCAL_PATH/ARTIFACTS_BUCKET_NAME. | ||
|
||
Note: if you are using "local" mode, it is recommended to use it | ||
for both storage and database configuration. | ||
But it's technically possible to mix, for example, local storage configuration | ||
and S3-based database configuration etc. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import calendar | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This implementation is as straightforward as it gets: |
||
import os | ||
import shutil | ||
|
||
from .tartifact_store import TartifactStore | ||
|
||
class LocalArtifactStore(TartifactStore): | ||
def __init__(self, config, | ||
bucket_name=None, | ||
verbose=10, | ||
measure_timestamp_diff=False, | ||
compression=None): | ||
|
||
if compression is None: | ||
compression = config.get('compression') | ||
|
||
self.endpoint = config.get('endpoint', '~') | ||
self.store_root = os.path.realpath(os.path.expanduser(self.endpoint)) | ||
if not os.path.exists(self.store_root) \ | ||
or not os.path.isdir(self.store_root): | ||
raise ValueError() | ||
|
||
self.bucket = bucket_name | ||
if self.bucket is None: | ||
self.bucket = config.get('bucket') | ||
self.store_root = os.path.join(self.store_root, self.bucket) | ||
self._ensure_path_dirs_exist(self.store_root) | ||
|
||
super(LocalArtifactStore, self).__init__( | ||
measure_timestamp_diff, | ||
compression=compression, | ||
verbose=verbose) | ||
|
||
def _ensure_path_dirs_exist(self, path): | ||
dirs = os.path.dirname(path) | ||
os.makedirs(dirs, mode = 0o777, exist_ok = True) | ||
|
||
def _upload_file(self, key, local_path): | ||
target_path = os.path.join(self.store_root, key) | ||
self._ensure_path_dirs_exist(target_path) | ||
shutil.copyfile(local_path, target_path) | ||
|
||
def _download_file(self, key, local_path, bucket=None): | ||
source_path = os.path.join(self.store_root, key) | ||
self._ensure_path_dirs_exist(local_path) | ||
shutil.copyfile(source_path, local_path) | ||
|
||
def _delete_file(self, key): | ||
os.remove(os.path.join(self.store_root, key)) | ||
|
||
def _get_file_url(self, key, method='GET'): | ||
return str(os.path.join(self.store_root, key)) | ||
|
||
def _get_file_post(self, key): | ||
return str(os.path.join(self.store_root, key)) | ||
|
||
def _get_file_timestamp(self, key): | ||
return None | ||
|
||
def get_qualified_location(self, key): | ||
return 'file:/' + self.store_root + '/' + key | ||
|
||
def get_bucket(self): | ||
return self.bucket |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import os | ||
import json | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Database low-level logic implemented simply as directory with file names being the keys |
||
|
||
from .keyvalue_provider import KeyValueProvider | ||
from .local_artifact_store import LocalArtifactStore | ||
|
||
class LocalDbProvider(KeyValueProvider): | ||
|
||
def __init__(self, config, blocking_auth=True, verbose=10, store=None): | ||
self.config = config | ||
self.bucket = config.get('bucket', 'studioml-meta') | ||
|
||
self.endpoint = config.get('endpoint', '~') | ||
self.db_root = os.path.realpath(os.path.expanduser(self.endpoint)) | ||
if not os.path.exists(self.db_root) \ | ||
or not os.path.isdir(self.db_root): | ||
raise ValueError("Local DB root {} doesn't exist or not a directory!".format(self.db_root)) | ||
|
||
self.bucket = config.get('bucket') | ||
self.db_root = os.path.join(self.db_root, self.bucket) | ||
self._ensure_path_dirs_exist(self.db_root) | ||
|
||
super(LocalDbProvider, self).__init__( | ||
config, | ||
blocking_auth, | ||
verbose, | ||
store) | ||
|
||
def _ensure_path_dirs_exist(self, path): | ||
dirs = os.path.dirname(path) | ||
os.makedirs(dirs, mode = 0o777, exist_ok = True) | ||
|
||
def _get(self, key, shallow=False): | ||
file_name = os.path.join(self.db_root, key) | ||
if not os.path.exists(file_name): | ||
return None | ||
with open(file_name) as infile: | ||
result = json.load(infile) | ||
return result | ||
|
||
def _delete(self, key): | ||
file_name = os.path.join(self.db_root, key) | ||
if os.path.exists(file_name): | ||
os.remove(file_name) | ||
|
||
def _set(self, key, value): | ||
file_name = os.path.join(self.db_root, key) | ||
self._ensure_path_dirs_exist(file_name) | ||
with open(file_name, 'w') as outfile: | ||
json.dump(value, outfile) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,8 @@ | |
from .artifact_store import get_artifact_store | ||
from .http_provider import HTTPProvider | ||
from .firebase_provider import FirebaseProvider | ||
from .local_artifact_store import LocalArtifactStore | ||
from .local_db_provider import LocalDbProvider | ||
from .s3_provider import S3Provider | ||
from .gs_provider import GSProvider | ||
from .model_setup import setup_model | ||
|
@@ -102,6 +104,16 @@ def get_db_provider(config=None, blocking_auth=True): | |
blocking_auth=blocking_auth) | ||
artifact_store = db_provider.get_artifact_store() | ||
|
||
elif db_config['type'].lower() == 'local': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding "local" option to our selector. |
||
if artifact_store is None: | ||
artifact_store = LocalArtifactStore(db_config, "storage", verbose) | ||
|
||
db_provider = LocalDbProvider(db_config, | ||
verbose=verbose, | ||
store=artifact_store, | ||
blocking_auth=blocking_auth) | ||
artifact_store = db_provider.get_artifact_store() | ||
|
||
else: | ||
_model_setup = None | ||
raise ValueError('Unknown type of the database ' + db_config['type']) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding "local" option to our selector.