apache · aweisberg · May 28, 2025 · May 22, 2025 · michaelsembwever · May 23, 2025
diff --git a/ccmlib/cluster.py b/ccmlib/cluster.py
@@ -574,6 +574,7 @@ def start(self, no_wait=False, verbose=False, wait_for_binary_proto=True,
                 if not node._wait_for_running(p, timeout_s=7):
                     raise NodeError("Node {} should be running before waiting for <started listening> log message, "
                                     "but C* process is terminated.".format(node.name))
+            for node, p, mark in started:
                 try:
                     timeout=kwargs.get('timeout', DEFAULT_CLUSTER_WAIT_TIMEOUT_IN_SECS)
                     timeout=int(os.environ.get('CCM_CLUSTER_START_TIMEOUT_OVERRIDE', timeout))

diff --git a/ccmlib/node.py b/ccmlib/node.py
@@ -789,6 +789,9 @@ def get_launch_bin(self):
     def add_custom_launch_arguments(self, args):
         pass
 
+    def __log_dir(self):
+        return '-Dcassandra.logdir=%s' % os.path.join(self.get_path(), 'logs')
+
     def start(self,
               join_ring=True,
               no_wait=False,
@@ -876,7 +879,7 @@ def start(self,
 
         args = args + ['-p', pidfile, '-Dcassandra.join_ring=%s' % str(join_ring)]
 
-        args.append('-Dcassandra.logdir=%s' % os.path.join(self.get_path(), 'logs'))
+        args.append(self.__log_dir())
         if replace_token is not None:
             args.append('-Dcassandra.replace_token=%s' % str(replace_token))
         if replace_address is not None:
@@ -982,6 +985,25 @@ def _wait_for_running(self, process, timeout_s):
             self._update_pid(process)
         return self.is_running()
 
+    def __unix_kill_process_matching(self, pattern, sig=signal.SIGTERM):
+        matcher = re.compile(pattern)
+        for proc in psutil.process_iter(['pid', 'cmdline']):
+            try:
+                pid = proc.info['pid']
+                cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
+                if matcher.search(cmdline):
+                    try:
+                        os.kill(int(pid), sig)
+                    except ProcessLookupError:
+                        logger.info(f"Process {pid} not found")
+                    except PermissionError:
+                        logger.info(f"Did not have permissions to kill {pid}")
+            except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                pass
+
+    def __unix_kill(self, sig):
+        self.__unix_kill_process_matching(".*{}.*{}.*".format(self.__log_dir(), "org.apache.cassandra.service.CassandraDaemon"), sig)
+
     def stop(self, wait=True, wait_other_notice=False, signal_event=signal.SIGTERM, **kwargs):
         """
         Stop the node.
@@ -1046,6 +1068,10 @@ def stop(self, wait=True, wait_other_notice=False, signal_event=signal.SIGTERM,
             else:
                 return True
         else:
+            # Make sure it is actually stopped even if the PID wasn't found for some reason
+            # Always kill because it should already be stopped and we aren't waiting for it to stop
+            if not common.is_win():
+                self.__unix_kill(signal.SIGKILL)
             return False
 
     def wait_for_compactions(self, timeout=120):