-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Introduce pydantic schema #5531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
50d471d
introduce pydantic schema
skshetry 8adfad1
add description and title
skshetry 65c9042
add pydantic as a test requirements
skshetry b365817
add minor tests
skshetry 8189eb5
delete schema
skshetry 1591e9b
remove use of typevars
skshetry 33874df
copy abstract method in all of the file types
skshetry File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from pydantic import BaseModel as PydanticBaseModel | ||
from pydantic import Extra | ||
|
||
|
||
class BaseModel(PydanticBaseModel): | ||
class Config: | ||
# TODO: figure out a way to make it configurable | ||
extra = Extra.forbid |
File renamed without changes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
from typing import Any, Dict, List, Optional, Type, Union | ||
|
||
from pydantic import Field, validator | ||
|
||
from dvc.types import OptStr | ||
|
||
from .base import BaseModel | ||
|
||
|
||
class OutProps(BaseModel): | ||
cache: bool = Field(True, description="Cache output by DVC") | ||
persist: bool = Field(False, description="Persist output between runs") | ||
checkpoint: bool = Field( | ||
False, | ||
description="Indicate that the output is associated with " | ||
"in-code checkpoints", | ||
) | ||
desc: Optional[str] = Field( | ||
None, | ||
description="User description for the output", | ||
title="Description", | ||
) | ||
|
||
|
||
class MetricProps(OutProps): | ||
pass | ||
|
||
|
||
FilePath = str | ||
|
||
|
||
class PlotProps(OutProps): | ||
template: Optional[FilePath] = Field( | ||
None, description="Default plot template" | ||
) | ||
x: OptStr = Field( | ||
None, description="Default field name to use as x-axis data" | ||
) | ||
y: OptStr = Field( | ||
None, description="Default field name to use as y-axis data" | ||
) | ||
x_label: OptStr = Field(None, description="Default label for the x-axis") | ||
y_label: OptStr = Field(None, description="Default label for the y-axis") | ||
title: OptStr = Field(None, description="Default plot title") | ||
header: bool = Field( | ||
False, description="Whether the target CSV or TSV has a header or not" | ||
) | ||
|
||
|
||
class LiveProps(PlotProps): | ||
summary: bool = Field( | ||
True, description="Signals dvclive to dump latest metrics file" | ||
) | ||
html: bool = Field( | ||
True, description="Signals dvclive to produce training report" | ||
) | ||
|
||
|
||
# eg: "file.txt", "file.txt:foo,bar", "file.txt:foo" | ||
VarImportSpec = str # validate here? | ||
# {"foo" (str) : "foobar" (Any) } | ||
LocalVarKey = str | ||
LocalVarValue = Any | ||
VarsSpec = List[Union[VarImportSpec, Dict[LocalVarKey, LocalVarValue]]] | ||
|
||
# key name of the param, usually from `params.yaml` | ||
ParamKey = str | ||
ParamsSpec = List[Union[ParamKey, Dict[FilePath, List[ParamKey]]]] | ||
|
||
|
||
class WithDescription(BaseModel): | ||
desc: OptStr = Field( | ||
None, description="Description of the stage", title="Description" | ||
) | ||
|
||
|
||
class StageDefinition(WithDescription, BaseModel): | ||
"""This is the raw one, which could be parametrized.""" | ||
|
||
cmd: Union[str, List[str]] = Field( | ||
..., description="Command to run", title="Command(s)" | ||
) # required | ||
wdir: OptStr = Field( | ||
None, description="Working directory", title="Working Directory" | ||
) | ||
deps: List[FilePath] = Field( | ||
default_factory=list, | ||
description="Dependencies for the stage", | ||
title="Dependencies", | ||
) | ||
params: ParamsSpec = Field( | ||
default_factory=list, | ||
description="Params for the stage", | ||
title="Parameter Dependencies", | ||
) | ||
vars: VarsSpec = Field( | ||
default_factory=list, | ||
description="Variables for the stage", | ||
title="Variables", | ||
) | ||
frozen: bool = Field(False, description="Assume stage as unchanged") | ||
meta: Any = Field( | ||
None, description="Additional information/metadata", title="Metadata" | ||
) | ||
always_changed: bool = Field( | ||
False, description="Assume stage as always changed" | ||
) | ||
outs: List[Union[Dict[FilePath, OutProps], FilePath]] = Field( | ||
default_factory=list, | ||
description="Additional information/metadata", | ||
title="Outputs", | ||
) | ||
plots: List[ | ||
Union[Dict[FilePath, Union[PlotProps, List[PlotProps]]], FilePath] | ||
] = Field( | ||
default_factory=list, description="Plots of the stage", title="Plots" | ||
) | ||
metrics: List[Union[Dict[FilePath, MetricProps], FilePath]] = Field( | ||
default_factory=list, | ||
description="Metrics of the stage", | ||
title="Metrics", | ||
) | ||
live: Union[Dict[FilePath, LiveProps], FilePath] = Field( | ||
default_factory=list, | ||
description="Declare output as dvclive", | ||
title="Dvclive", | ||
) | ||
|
||
# Note: we don't support parametrization in props and in | ||
# frozen/always_changed/meta yet. | ||
|
||
|
||
# trying to differentiate here between normal str expectation | ||
# and parametrized ones | ||
ParametrizedString = str # validate with constr()? | ||
|
||
ListAny = List[Any] | ||
DictStrAny = Dict[str, Any] | ||
|
||
FOREACH_DESC = """\ | ||
Iterable to loop through in foreach. Can be a parametrized string, list \ | ||
or a dictionary. | ||
|
||
The stages will be generated by iterating through this data, by substituting | ||
data in the `do` block.""" | ||
|
||
DO_DESC = """\ | ||
Parametrized stage definition that'll be substituted over for each of the | ||
value from the foreach data.""" | ||
|
||
|
||
class ForeachDo(BaseModel): | ||
foreach: Union[ParametrizedString, ListAny, DictStrAny] = Field( | ||
..., description=FOREACH_DESC | ||
) | ||
do: StageDefinition = Field(..., description=DO_DESC) | ||
|
||
|
||
Definition = Union[ForeachDo, StageDefinition] | ||
StageName = str | ||
|
||
|
||
class Schema(BaseModel): | ||
vars: VarsSpec = Field( | ||
default_factory=list, | ||
description="Variables for the parametrization", | ||
title="Variables", | ||
) | ||
stages: Dict[StageName, Definition] = Field( | ||
default_factory=dict, description="List of stages" | ||
) | ||
|
||
@validator("stages", each_item=True, pre=True) | ||
@classmethod | ||
def validate_stages(cls, v: Any): | ||
if not isinstance(v, dict): | ||
raise TypeError("must be a dict") | ||
|
||
if v.keys() & {"foreach", "do"}: | ||
return ForeachDo.parse_obj(v) | ||
|
||
return StageDefinition.parse_obj(v) | ||
|
||
class Config: | ||
title = "dvc.yaml schema" | ||
|
||
|
||
def get_schema(extra: str = "forbid") -> Type[Schema]: | ||
assert extra | ||
return Schema |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To test new schema, you can toggle this to
True
, and it should start using the new schema for validation.