Skip to content

Commit 9f414fe

Browse files
authored
fix(master): improve instance monitoring logic by removing isInstanceAlive method and refining error detection
1 parent 825afab commit 9f414fe

File tree

1 file changed

+1
-27
lines changed

1 file changed

+1
-27
lines changed

internal/master.go

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ func (m *Master) monitorInstance(instance *Instance, cmd *exec.Cmd) {
13011301
}
13021302
return
13031303
case <-time.After(reportInterval):
1304-
if !m.isInstanceAlive(instance) {
1304+
if !instance.lastCheckPoint.IsZero() && time.Since(instance.lastCheckPoint) > 5*reportInterval {
13051305
instance.Status = "error"
13061306
m.instances.Store(instance.ID, instance)
13071307
m.sendSSEEvent("update", instance)
@@ -1310,32 +1310,6 @@ func (m *Master) monitorInstance(instance *Instance, cmd *exec.Cmd) {
13101310
}
13111311
}
13121312

1313-
// isInstanceAlive 检查实例是否存活
1314-
func (m *Master) isInstanceAlive(instance *Instance) bool {
1315-
// 进程存活检测
1316-
alive := false
1317-
if instance.cmd != nil && instance.cmd.Process != nil {
1318-
if runtime.GOOS == "windows" {
1319-
process, err := os.FindProcess(instance.cmd.Process.Pid)
1320-
if err != nil {
1321-
return false
1322-
}
1323-
alive = process.Signal(syscall.Signal(0)) == nil
1324-
} else {
1325-
alive = syscall.Kill(instance.cmd.Process.Pid, 0) == nil
1326-
}
1327-
}
1328-
if !alive {
1329-
return false
1330-
}
1331-
1332-
// 心跳存活检测
1333-
if !instance.lastCheckPoint.IsZero() && time.Since(instance.lastCheckPoint) > 6*reportInterval {
1334-
return false
1335-
}
1336-
return true
1337-
}
1338-
13391313
// stopInstance 停止实例
13401314
func (m *Master) stopInstance(instance *Instance) {
13411315
// 如果已经是停止状态,不重复操作

0 commit comments

Comments
 (0)