poc comfystream.pipeline implementation

eliteprox · eliteprox · commit fd14d5adc284 · 2025-04-14T21:10:34.000Z
diff --git a/runner/app/live/pipelines/comfyui.py b/runner/app/live/pipelines/comfyui.py
@@ -6,10 +6,11 @@
 from typing import Union
 from pydantic import BaseModel, field_validator
 import pathlib
+import av
 
 from .interface import Pipeline
 from comfystream.pipeline import Pipeline as ComfyStreamPipeline
-from trickle import VideoFrame, VideoOutput
+from trickle import VideoFrame, VideoOutput, AudioFrame, AudioOutput
 
 import logging
 
@@ -64,22 +65,22 @@ async def initialize(self, **params):
         await self.pipeline.warm_video()
         logging.info("Pipeline initialization and warmup complete")
 
+
     async def put_video_frame(self, frame: VideoFrame, request_id: str):
-        # Convert VideoFrame to format expected by comfystream
-        image_np = np.array(frame.image.convert("RGB")).astype(np.float32) / 255.0
-        frame.side_data.input = torch.tensor(image_np).unsqueeze(0)
-        frame.side_data.skipped = True
-        frame.side_data.request_id = request_id
-        await self.pipeline.put_video_frame(frame)
-
-    async def get_processed_video_frame(self) -> VideoOutput:
-        processed_frame = await self.pipeline.get_processed_video_frame()
-        # Convert back to VideoOutput format
-        result_tensor = processed_frame.side_data.input
-        result_tensor = result_tensor.squeeze(0)
-        result_image_np = (result_tensor * 255).byte()
-        result_image = Image.fromarray(result_image_np.cpu().numpy())
-        return VideoOutput(processed_frame, processed_frame.side_data.request_id).replace_image(result_image)
+        await self.pipeline.put_video_frame(self._convert_to_av_frame(frame))
+
+    async def put_audio_frame(self, frame: AudioFrame, request_id: str):
+        await self.pipeline.put_audio_frame(self._convert_to_av_frame(frame))
+
+    async def get_processed_video_frame(self, request_id: str) -> VideoOutput:
+        av_frame = await self.pipeline.get_processed_video_frame()
+        video_frame = VideoFrame.from_av_video(av_frame)
+        video_frame.side_data.request_id = request_id
+        return VideoOutput(video_frame).replace_image(av_frame.to_image())
+
+    async def get_processed_audio_frame(self, request_id: str) -> AudioOutput:        
+        av_frame = await self.pipeline.get_processed_audio_frame()
+        return AudioOutput(av_frame, request_id)
 
     async def update_params(self, **params):
         new_params = ComfyUIParams(**params)
@@ -91,3 +92,22 @@ async def stop(self):
         logging.info("Stopping ComfyUI pipeline")
         await self.pipeline.cleanup()
         logging.info("ComfyUI pipeline stopped")
+
+    def _convert_to_av_frame(self, frame: Union[VideoFrame, AudioFrame]) -> Union[av.VideoFrame, av.AudioFrame]:
+        """Convert trickle frame to av frame"""
+        if isinstance(frame, VideoFrame):
+            av_frame = av.VideoFrame.from_ndarray(
+                np.array(frame.image.convert("RGB")), 
+                format='rgb24'
+            )
+        elif isinstance(frame, AudioFrame):
+            av_frame = av.AudioFrame.from_ndarray(
+                frame.samples.reshape(-1, 1),
+                layout='mono',
+                rate=frame.rate
+            )
+        
+        # Common frame properties
+        av_frame.pts = frame.timestamp
+        av_frame.time_base = frame.time_base
+        return av_frame
diff --git a/runner/app/live/pipelines/interface.py b/runner/app/live/pipelines/interface.py
@@ -23,7 +23,7 @@ def __init__(self, **params):
         pass
 
     @abstractmethod
-    async def put_video_frame(self, frame: VideoFrame, request_id: str):
+    async def put_video_frame(self, frame: VideoFrame):
         """Put a frame into the pipeline.
 
         Args:
@@ -32,7 +32,7 @@ async def put_video_frame(self, frame: VideoFrame, request_id: str):
         pass
 
     @abstractmethod
-    async def get_processed_video_frame(self) -> VideoOutput:
+    async def get_processed_video_frame(self, request_id: str) -> VideoOutput:
         """Get a processed frame from the pipeline.
 
         Returns:
diff --git a/runner/app/live/pipelines/noop.py b/runner/app/live/pipelines/noop.py
@@ -13,10 +13,10 @@ def __init__(self):
   async def put_video_frame(self, frame: VideoFrame, request_id: str):
     await self.frame_queue.put(VideoOutput(frame, request_id))
 
-  async def get_processed_video_frame(self) -> VideoOutput:
+  async def get_processed_video_frame(self, request_id: str) -> VideoOutput:
     out = await self.frame_queue.get()
     processed_frame = out.image.convert("RGB")
-    return out.replace_image(processed_frame)
+    return VideoOutput(out.frame.replace_image(processed_frame), request_id)
 
   async def initialize(self, **params):
     logging.info(f"Initializing Noop pipeline with params: {params}")
diff --git a/runner/app/live/streamer/process.py b/runner/app/live/streamer/process.py
@@ -109,7 +109,7 @@ def get_recent_logs(self, n=None) -> list[str]:
 
     def process_loop(self):
         self._setup_logging()
-        pipeline = None
+#        pipeline = None
 
         # Ensure CUDA environment is available inside the subprocess.
         # Multiprocessing (spawn mode) does not inherit environment variables by default,
@@ -146,7 +146,7 @@ async def _initialize_pipeline(self):
             params = {}
             try:
                 params = self.param_update_queue.get_nowait()
-                logging.info(f"PipelineProcess: Got params from param_update_queue {params}")
+                logging.info(f"PipelineProcess: Got params from param_update_queue {params}") 
                 params = self._handle_logging_params(params)
             except queue.Empty:
                 logging.info("PipelineProcess: No params found in param_update_queue, loading with default params")
@@ -206,8 +206,8 @@ async def _input_loop(self, pipeline: Pipeline):
     async def _output_loop(self, pipeline: Pipeline):
         while not self.is_done():
             try:
-                output_frame = await pipeline.get_processed_video_frame()
-                output_frame.log_timestamps["post_process_frame"] = time.time()
+                output_frame = await pipeline.get_processed_video_frame(self.request_id)
+                #output_frame.log_timestamps["post_process_frame"] = time.time()
                 await asyncio.to_thread(self.output_queue.put, output_frame)
             except Exception as e:
                 self._report_error(f"Error processing output frame: {e}")
diff --git a/runner/app/live/streamer/streamer.py b/runner/app/live/streamer/streamer.py
@@ -54,12 +54,11 @@ async def start(self, params: dict):
         self.main_tasks = [
             run_in_background("ingress_loop", self.run_ingress_loop()),
             run_in_background("egress_loop", self.run_egress_loop()),
-            run_in_background("report_status_loop", self.report_status_loop()),
-            run_in_background("control_loop", self.run_control_loop()),
+            run_in_background("report_status_loop", self.report_status_loop())
         ]
         # auxiliary tasks that are not critical to the supervisor, but which we want to run
         self.auxiliary_tasks = [
-            
+            run_in_background("control_loop", self.run_control_loop())
         ]
         self.tasks_supervisor_task = run_in_background(
             "tasks_supervisor", self.tasks_supervisor()