Capture standard output when loading the predictor

aron · aron · commit 18a3e90fe4fb · 2025-02-13T11:35:53.000Z
This commit fixes a bug where we failed to flush the StreamRedirector
when catching an exception during the loading of the predictor module.

We now use the existing `_handle_setup_error` function to ensure that
the streams are flushed. I've kept the naming of the context manager
the same because this all happens as part of the model setup.

Two regression tests have been added to reproduce and verify that the
issue has been fixed, both in normal and concurrent/async mode.
diff --git a/python/cog/server/worker.py b/python/cog/server/worker.py
@@ -430,9 +430,11 @@ def run(self) -> None:
             )
 
         with scope(Scope(record_metric=self.record_metric)), redirector:
-            self._predictor = self._load_predictor()
+            with self._handle_setup_error(redirector):
+                wait_for_env()
+                self._predictor = load_predictor_from_ref(self._predictor_ref)
 
-            # If _load_predictor hasn't returned a predictor instance then
+            # If load_predictor_from_ref hasn't returned a predictor instance then
             # it has sent a error Done event and we're done here.
             if not self._predictor:
                 return
@@ -483,27 +485,6 @@ def _current_tag(self) -> Optional[str]:
             return _get_current_scope()._tag
         return self._sync_tag
 
-    def _load_predictor(self) -> Optional[BasePredictor]:
-        done = Done()
-        wait_for_env()
-        try:
-            return load_predictor_from_ref(self._predictor_ref)
-        except Exception as e:  # pylint: disable=broad-exception-caught
-            traceback.print_exc()
-            done.error = True
-            done.error_detail = str(e)
-            self._events.send(Envelope(event=done))
-        except BaseException as e:
-            # For SystemExit and friends we attempt to add some useful context
-            # to the logs, but reraise to ensure the process dies.
-            traceback.print_exc()
-            done.error = True
-            done.error_detail = str(e)
-            self._events.send(Envelope(event=done))
-            raise
-
-        return None
-
     def _validate_predictor(
         self,
         redirector: Union[StreamRedirector, SimpleStreamRedirector],
diff --git a/python/tests/server/fixtures/import_err.py b/python/tests/server/fixtures/import_err.py
@@ -0,0 +1,14 @@
+import sys
+
+sys.stdout.write("writing to stdout at import time\n")
+sys.stderr.write("writing to stderr at import time\n")
+
+import missing_module
+
+
+class Predictor:
+    def setup(self):
+        pass
+
+    def predict(self):
+        print("did predict")
diff --git a/python/tests/server/test_worker.py b/python/tests/server/test_worker.py
@@ -487,7 +487,7 @@ def test_output(worker, payloads, output_generator, data):
     SETUP_LOGS_FIXTURES,
     indirect=["worker"],
 )
-def test_setup_logging(worker, expected_stdout, expected_stderr):
+def test_setup_logging(worker: Worker, expected_stdout, expected_stderr):
     """
     We should get the logs we expect from predictors that generate logs during
     setup.
@@ -499,6 +499,32 @@ def test_setup_logging(worker, expected_stdout, expected_stderr):
     assert result.stderr == expected_stderr
 
 
+@uses_worker_configs(
+    [
+        WorkerConfig("import_err", setup=False),
+        WorkerConfig("import_err", setup=False, min_python=(3, 11), is_async=True),
+    ]
+)
+def test_predictor_load_error_logging(worker: Worker):
+    """
+    This test ensures that we capture standard output that occurrs when the predictor
+    errors when it is loaded. Before setup or predict are even run.
+    """
+    result = _process(worker, worker.setup, swallow_exceptions=True)
+
+    assert result.done.error
+    assert result.done.error_detail == "No module named 'missing_module'"
+
+    assert result.stdout == "writing to stdout at import time\n"
+    stderr_lines = result.stderr.splitlines(keepends=True)
+    assert stderr_lines[0] == "writing to stderr at import time\n"
+
+    assert "python/tests/server/fixtures/import_err.py" in stderr_lines[-3]
+    assert "line 6" in stderr_lines[-3]
+    assert "import missing_module" in stderr_lines[-2]
+    assert stderr_lines[-1] == "ModuleNotFoundError: No module named 'missing_module'\n"
+
+
 @pytest.mark.parametrize(
     "worker,expected_stdout,expected_stderr",
     PREDICT_LOGS_FIXTURES,