Some notes as docstrings

jsbueno · jsbueno · commit e32265808f08 · 2025-04-26T00:21:37.000-03:00
diff --git a/extraasync/pipeline.py b/extraasync/pipeline.py
@@ -173,8 +173,8 @@ def __call__(self, value):
 class Pipeline:
     """
     Pipeline class
-        Will enable mapping data from an iterator source to be passed down various stages
-        of execution, where the result of each estage is fed to the next one
+        Will enable mapping data from an iterable source to be passed down various stages
+        of execution, where the result of each stage is fed to the next one
 
         The difference for just calling one (or more) stages inline in a for function
         that pipeline allows for fine grained concurrency specification and error handling
@@ -192,6 +192,23 @@ def __init__(
         preserve_order: bool = False,
         max_simultaneous_records: t.Optional[int] = None,
     ):
+    """
+    Args:
+        - stages: One async or sync callable which will process one data item at a time
+            - TBD? accept generators as stages? (input data would be ".send"ed into it)
+        - data: async or sync generator representing the data source
+        - max_concurrency: Maximum number of concurrent tasks _for_ _each_ stage
+            (i.e. if there are 2 stages, and max_concurrency is set to 4, we may have
+            up to 8 concurrent tasks running at once in the pipeline, but each stage is
+            limited to 4)
+        - on_error: WHat to do if any stage raises an exeception - defaults to re-raise the
+                exception and stop the whole pipeline
+        - preserve_order: whether to yield the final results in the same order they were acquired from data.
+        - max_simultaneous_records: limit on amount of records to hold across all stages and input in internal
+            data structures: the idea is throtle data consumption in order to limit the
+            amount of memory used by the Pipeline
+
+    """
         self.max_concurrency = max_concurrency
         self.data = _as_async_iterable(data)
         self.preserve_order = preserve_order