Skip to content

Commit bb5aa14

Browse files
authored
endpoint selector tests (#58)
* add detection warnings for selected resources (#57) * adds basic warnings framework with primary key not detected warning * add tests for primary key warning and unresolved path params warning * update ci config * add warnings for missing response body * move config to config object for tests * fix ids to render * add test for deselected parent * add basic test for endpoint filtering * fix test error
1 parent 399a2ae commit bb5aa14

File tree

20 files changed

+354
-2128
lines changed

20 files changed

+354
-2128
lines changed

dlt_openapi/__init__.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from dlt_openapi.utils.misc import import_class_from_string
1111

1212
from .config import Config
13-
from .detector.base_detector import BaseDetector
13+
from .detector.base_detector import GLOBAL_WARNING_KEY, BaseDetector
1414
from .parser.openapi_parser import OpenapiParser
1515
from .renderer.base_renderer import BaseRenderer
1616

@@ -53,11 +53,32 @@ def render(self, dry: bool = False) -> None:
5353
logger.info("Rendering project")
5454
if self.config.endpoint_filter:
5555
filtered_endpoints = self.config.endpoint_filter(self.openapi.endpoints)
56-
self.openapi.endpoints.set_names_to_render(filtered_endpoints)
56+
self.openapi.endpoints.set_ids_to_render(filtered_endpoints)
5757
self.renderer.run(self.openapi, dry=dry)
5858
logger.success(f"Rendered project to: {self.config.project_dir}")
5959
logger.info("You can now run your pipeline from this folder with 'python pipeline.py'.")
6060

61+
def print_warnings(self) -> None:
62+
"""print warnings to logger if any where encountered for endpoints that are being rendered"""
63+
warnings = self.detector.get_warnings()
64+
if not warnings:
65+
logger.info("No warnings generated during parsing and detection")
66+
return
67+
68+
# print the global warnings
69+
if global_warnings := warnings.get(GLOBAL_WARNING_KEY):
70+
logger.warning("Global warnings:")
71+
for w in global_warnings:
72+
logger.warning(w.msg)
73+
74+
# print warnings, but only for endpoints that where rendered
75+
for endpoint_id, endpoint_warnings in warnings.items():
76+
if endpoint_id in self.openapi.endpoints.endpoint_ids_to_render:
77+
e = self.openapi.endpoints.endpoints_by_id[endpoint_id]
78+
logger.warning(f"Warnings for endpoint {e.method} {e.path}:")
79+
for w in endpoint_warnings:
80+
logger.warning(w.msg)
81+
6182

6283
def _get_project_for_url_or_path( # pylint: disable=too-many-arguments
6384
url: Optional[str],
@@ -95,4 +116,5 @@ def create_new_client(
95116
project.parse()
96117
project.detect()
97118
project.render()
119+
project.print_warnings()
98120
return project

dlt_openapi/cli/cli_endpoint_selection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ def questionary_endpoint_selection(endpoints: EndpointCollection) -> Set[str]:
2525
).ask()
2626

2727
# return resource names of selected endpoints
28-
return {e.detected_resource_name for e in selected_endpoints}
28+
return {e.id for e in selected_endpoints}

dlt_openapi/config.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,18 @@ class Config(BaseModel):
4343
global_limit: int = 0
4444
"""Set a limit on how many items are emitted from a resource"""
4545

46+
# internal, do not set via config file
4647
project_dir: Path = None
4748

4849
def __init__(self, *args: Any, **kwargs: Any) -> None:
4950
super(Config, self).__init__(*args, **kwargs)
50-
base_dir = Path.cwd() if not self.output_path else Path.cwd() / self.output_path
51-
project_folder = self.project_name + self.project_folder_suffix
52-
self.project_dir = base_dir / project_folder
51+
self.prepare()
52+
53+
def prepare(self) -> None:
54+
if self.project_name and self.project_folder_suffix:
55+
base_dir = Path.cwd() if not self.output_path else Path.cwd() / self.output_path
56+
project_folder = self.project_name + self.project_folder_suffix
57+
self.project_dir = base_dir / project_folder
5358

5459
@staticmethod
5560
def load_from_path(path: Path, *args: Any, **kwargs: Any) -> "Config":

dlt_openapi/detector/base_detector.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,17 @@
33
"""
44

55
from abc import abstractmethod
6-
from typing import TYPE_CHECKING
6+
from typing import TYPE_CHECKING, Dict, List
77

88
if TYPE_CHECKING:
99
from dlt_openapi.parser.openapi_parser import OpenapiParser
1010

11+
GLOBAL_WARNING_KEY = "global"
12+
13+
14+
class BaseDetectionWarning:
15+
msg: str = ""
16+
1117

1218
class BaseDetector:
1319
@abstractmethod
@@ -18,3 +24,8 @@ def run(self, open_api: "OpenapiParser") -> None:
1824
open_api (OpenapiParser): OpenAPI parser instance
1925
"""
2026
...
27+
28+
@abstractmethod
29+
def get_warnings(self) -> Dict[str, List[BaseDetectionWarning]]:
30+
"""Get all warnings encountered during detection run"""
31+
...

dlt_openapi/detector/default/__init__.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Dict, List, Optional, Tuple, Union, cast
66

77
from dlt_openapi.config import Config
8-
from dlt_openapi.detector.base_detector import BaseDetector
8+
from dlt_openapi.detector.base_detector import GLOBAL_WARNING_KEY, BaseDetector
99
from dlt_openapi.detector.default import utils
1010
from dlt_openapi.detector.default.primary_key import detect_primary_key_by_name
1111
from dlt_openapi.parser.endpoints import Endpoint, EndpointCollection, Response, TransformerSetting
@@ -36,16 +36,27 @@
3636
RE_UNIQUE_KEY,
3737
)
3838
from .utils import to_int
39+
from .warnings import (
40+
BaseDetectionWarning,
41+
DataResponseNoBodyWarning,
42+
DataResponseUndetectedWarning,
43+
PrimaryKeyNotFoundWarning,
44+
UnresolvedPathParametersWarning,
45+
)
3946

4047
Tree = Dict[str, Union["str", "Tree"]]
4148

4249

4350
class DefaultDetector(BaseDetector):
51+
52+
warnings: Dict[str, List[BaseDetectionWarning]] = {}
53+
4454
def __init__(self, config: Config) -> None:
4555
self.config = config
4656

4757
def run(self, open_api: OpenapiParser) -> None:
4858
"""Run the detector"""
59+
self.warnings = {}
4960

5061
# discover stuff from responses
5162
self.detect_paginators_and_responses(open_api.endpoints)
@@ -62,6 +73,11 @@ def run(self, open_api: OpenapiParser) -> None:
6273
# and sort resources by table name
6374
open_api.endpoints.endpoints.sort(key=lambda e: e.detected_table_name)
6475

76+
# add some warnings
77+
for e in open_api.endpoints.endpoints:
78+
if params := e.unresolvable_path_param_names:
79+
self._add_warning(UnresolvedPathParametersWarning(params), e)
80+
6581
def detect_resource_names(self, endpoints: EndpointCollection) -> None:
6682
"""iterate all endpoints and find a strategy to select the right resource name"""
6783

@@ -144,9 +160,9 @@ def detect_paginators_and_responses(self, endpoints: EndpointCollection) -> None
144160
endpoint.detected_data_response.detected_payload = self.detect_response_payload(
145161
endpoint.detected_data_response, expect_list=expect_list
146162
)
147-
self.detect_primary_key(endpoint.detected_data_response, endpoint.path)
163+
self.detect_primary_key(endpoint, endpoint.detected_data_response, endpoint.path)
148164

149-
def detect_primary_key(self, response: Response, path: str) -> None:
165+
def detect_primary_key(self, e: Endpoint, response: Response, path: str) -> None:
150166
"""detect the primary key from the payload"""
151167
if not response.detected_payload:
152168
return
@@ -179,6 +195,9 @@ def detect_primary_key(self, response: Response, path: str) -> None:
179195
elif uuid_paths:
180196
response.detected_primary_key = uuid_paths[0]
181197

198+
if not response.detected_primary_key:
199+
self._add_warning(PrimaryKeyNotFoundWarning(), e)
200+
182201
def detect_main_response(self, endpoint: Endpoint) -> Optional[Response]:
183202
"""Get main response and pagination for endpoint"""
184203

@@ -191,6 +210,12 @@ def detect_main_response(self, endpoint: Endpoint) -> Optional[Response]:
191210
if response.status_code.startswith("2") and not main_response:
192211
main_response = response
193212

213+
if not main_response:
214+
self._add_warning(DataResponseUndetectedWarning(), endpoint)
215+
216+
if main_response and not main_response.schema:
217+
self._add_warning(DataResponseNoBodyWarning(), endpoint)
218+
194219
return main_response
195220

196221
def detect_response_payload(self, response: Response, expect_list: bool) -> Optional[DataPropertyPath]:
@@ -397,3 +422,11 @@ def find_nearest_list_parent(endpoint: Endpoint) -> Optional[Endpoint]:
397422
endpoint.detected_parent = find_nearest_list_parent(endpoint)
398423
if endpoint.detected_parent:
399424
endpoint.detected_parent.detected_children.append(endpoint)
425+
426+
def get_warnings(self) -> Dict[str, List[BaseDetectionWarning]]:
427+
return self.warnings
428+
429+
def _add_warning(self, warning: BaseDetectionWarning, e: Optional[Endpoint] = None) -> None:
430+
key = e.id if e else GLOBAL_WARNING_KEY
431+
warning_list = self.warnings.setdefault(key, [])
432+
warning_list.append(warning)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from typing import List
2+
3+
from dlt_openapi.detector.base_detector import BaseDetectionWarning
4+
5+
6+
class PrimaryKeyNotFoundWarning(BaseDetectionWarning):
7+
msg: str = "Primary key could not be detected"
8+
9+
10+
class UnresolvedPathParametersWarning(BaseDetectionWarning):
11+
12+
def __init__(self, params: List[str]) -> None:
13+
self.params = params
14+
self.msg = f"Could not resolve all path params, setting default values for: {','.join(params)}"
15+
16+
17+
class DataResponseUndetectedWarning(BaseDetectionWarning):
18+
msg: str = (
19+
"Could not detect the main data response with a status code 2xx. "
20+
+ "Will not be able to detect primary key and some paginators."
21+
)
22+
23+
24+
class DataResponseNoBodyWarning(BaseDetectionWarning):
25+
msg: str = (
26+
"No json response schema defined on main data response. "
27+
+ "Will not be able to detect primary key and some paginators."
28+
)

dlt_openapi/parser/endpoints.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ class Endpoint:
6161
detected_children: List["Endpoint"] = field(default_factory=list)
6262
detected_transformer_settings: Optional[TransformerSetting] = None
6363

64+
@property
65+
def id(self) -> str:
66+
"""unique identifier"""
67+
return self.operation_id
68+
6469
@property
6570
def payload(self) -> Optional[DataPropertyPath]:
6671
"""gets payload dataproperty path if detected"""
@@ -159,46 +164,39 @@ def from_operation(
159164
@dataclass
160165
class EndpointCollection:
161166
endpoints: List[Endpoint]
162-
names_to_render: Set[str] = field(default_factory=set)
163-
names_to_deselect: Set[str] = field(default_factory=set)
167+
endpoint_ids_to_render: Set[str] = field(default_factory=set)
168+
endpoint_ids_to_deselect: Set[str] = field(default_factory=set)
164169

165170
@property
166171
def all_endpoints_to_render(self) -> List[Endpoint]:
167172
"""get all endpoints we want to render"""
168-
if not self.names_to_render:
169-
return self.endpoints
170-
return [e for e in self.endpoints if e.detected_resource_name in self.names_to_render]
171-
172-
@property
173-
def all_endpoints_for_selector(self) -> List[Endpoint]:
174-
pass
173+
return [e for e in self.endpoints if e.id in self.endpoint_ids_to_render]
175174

176175
@property
177176
def endpoints_by_path(self) -> Dict[str, Endpoint]:
178177
"""Endpoints by path"""
179178
return {ep.path: ep for ep in self.endpoints}
180179

181180
@property
182-
def endpoints_by_detected_resource_name(self) -> Dict[str, Endpoint]:
181+
def endpoints_by_id(self) -> Dict[str, Endpoint]:
183182
"""Endpoints by path"""
184-
return {ep.detected_resource_name: ep for ep in self.endpoints}
183+
return {ep.id: ep for ep in self.endpoints}
185184

186-
def set_names_to_render(self, names: Set[str]) -> None:
187-
selected_names = set()
188-
render_names = set()
185+
def set_ids_to_render(self, ids: Set[str]) -> None:
186+
selected_ids = set()
187+
ids_to_render = set()
189188

190189
# traverse ancestry chain and make sure parent endpoints are also marked for rendering
191190
# but deselected
192-
for name in names:
193-
ep = self.endpoints_by_detected_resource_name[name]
194-
render_names.add(ep.detected_resource_name)
195-
selected_names.add(ep.detected_resource_name)
191+
for endpoint_id in ids:
192+
ep = self.endpoints_by_id[endpoint_id]
193+
ids_to_render.add(ep.id)
194+
selected_ids.add(ep.id)
196195
while ep.transformer and ep.parent:
197-
render_names.add(ep.parent.detected_resource_name)
196+
ids_to_render.add(ep.parent.id)
198197
ep = ep.parent
199-
200-
self.names_to_render = render_names
201-
self.names_to_deselect = render_names - selected_names
198+
self.endpoint_ids_to_render = ids_to_render
199+
self.endpoint_ids_to_deselect = ids_to_render - selected_ids
202200

203201
@classmethod
204202
def from_context(cls, context: OpenapiContext) -> "EndpointCollection":
@@ -221,4 +219,4 @@ def from_context(cls, context: OpenapiContext) -> "EndpointCollection":
221219
context=context,
222220
)
223221
)
224-
return cls(endpoints=endpoints)
222+
return cls(endpoints=endpoints, endpoint_ids_to_render={e.id for e in endpoints})

dlt_openapi/renderer/default/templates/source.py.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def {{ source_name }}(
3232
{% if endpoint.primary_key %}
3333
"primary_key": "{{ endpoint.primary_key }}",
3434
"write_disposition": "merge",
35-
{% if endpoint.detected_resource_name in endpoint_collection.names_to_deselect %}
35+
{% if endpoint.id in endpoint_collection.endpoint_ids_to_deselect %}
3636
"selected": False,
3737
{% endif %}
3838
{% endif %}

0 commit comments

Comments
 (0)