feat: 在设备检测逻辑中添加失败计数器和错误类型判断,优化设备快照获取失败时的处理逻辑;在设备分配服务中引入设备优先前缀配置,增强设备分配的灵活性和负载均衡能力

This commit is contained in:
zyh
2025-10-05 16:02:56 +08:00
parent b60a5717c6
commit a7e02936ee
7 changed files with 310 additions and 8 deletions

View File

@@ -119,6 +119,10 @@ public class Detection {
return this.lastSnapshot;
}
// 失败计数器(用于监控)
private volatile int consecutiveFailures = 0;
private static final int MAX_FAILURES_BEFORE_ALERT = 3;
/**
* 定时任务:全量拉取并交由 DeviceStats 更新内存分类与审计。
* 默认每 30 秒执行一次可通过配置覆盖detection.poll.cron 或 detection.poll.fixedDelayMs
@@ -128,7 +132,14 @@ public class Detection {
try {
log.info("定时拉取设备快照并更新统计");
DeviceStatusResponse snapshot = listAllDevices();
if (snapshot != null) {
// 重置失败计数器
if (consecutiveFailures > 0) {
log.info("设备状态获取恢复正常,重置失败计数器(之前连续失败{}次)", consecutiveFailures);
consecutiveFailures = 0;
}
deviceStats.updateWithSnapshot(snapshot);
log.info("设备快照更新统计完成");
@@ -155,9 +166,36 @@ public class Detection {
} catch (Exception ex) {
log.warn("基于设备快照推进USING→LOGGED_IN时出现异常", ex);
}
} else {
// 快照获取失败,使用旧缓存继续部分功能
DeviceStatusResponse cachedSnapshot = getLastSnapshot();
if (cachedSnapshot != null) {
log.warn("使用缓存的设备快照数据(可能过期),避免服务完全中断");
// 不更新统计,但可以尝试继续状态推进
} else {
log.warn("无可用设备快照(包括缓存),跳过本轮更新");
}
}
} catch (Exception e) {
log.error("定时拉取设备快照并更新统计失败", e);
consecutiveFailures++;
// 判断异常类型,给出更明确的错误信息
String errorType = "未知错误";
if (e instanceof org.springframework.web.reactive.function.client.WebClientRequestException) {
if (e.getMessage().contains("connection timed out") || e.getMessage().contains("ConnectTimeoutException")) {
errorType = "连接超时";
} else if (e.getMessage().contains("Connection refused")) {
errorType = "连接被拒绝";
}
}
if (consecutiveFailures >= MAX_FAILURES_BEFORE_ALERT) {
log.error("【告警】定时拉取设备快照连续失败{}次(错误类型:{}),请检查脚本服务器连接: {}",
consecutiveFailures, errorType, e.getMessage());
} else {
log.error("定时拉取设备快照并更新统计失败(连续失败{}次,错误类型:{}",
consecutiveFailures, errorType, e);
}
}
}

View File

@@ -128,6 +128,19 @@ public class SystemConfigService {
return getConfigValue("user.custom_scan_content", "");
}
// 获取设备优先前缀列表逗号分隔返回List
public List<String> getDevicePriorityPrefixes() {
String value = getConfigValue("device.priority_prefixes", "");
if (value == null || value.trim().isEmpty()) {
return List.of();
}
// 按逗号分隔,去除空白,过滤空字符串
return java.util.Arrays.stream(value.split(","))
.map(String::trim)
.filter(s -> !s.isEmpty())
.collect(java.util.stream.Collectors.toList());
}
// 批量更新配置
public boolean updateConfigs(List<SystemConfig> configs) {
if (configs == null || configs.isEmpty()) {

View File

@@ -3,6 +3,7 @@ package com.gameplatform.server.service.link;
import com.gameplatform.server.mapper.agent.LinkTaskMapper;
import com.gameplatform.server.model.entity.agent.LinkTask;
import com.gameplatform.server.service.cooldown.MemoryMachineCooldownService;
import com.gameplatform.server.service.admin.SystemConfigService;
import com.gameplatform.server.mapper.history.LinkTaskStatusHistoryMapper;
import com.gameplatform.server.model.entity.history.LinkTaskStatusHistory;
import org.slf4j.Logger;
@@ -28,13 +29,16 @@ public class DeviceAllocationService {
private final MemoryMachineCooldownService machineCooldownService;
private final LinkTaskMapper linkTaskMapper;
private final LinkTaskStatusHistoryMapper statusHistoryMapper;
private final SystemConfigService systemConfigService;
public DeviceAllocationService(MemoryMachineCooldownService machineCooldownService,
LinkTaskMapper linkTaskMapper,
LinkTaskStatusHistoryMapper statusHistoryMapper) {
LinkTaskStatusHistoryMapper statusHistoryMapper,
SystemConfigService systemConfigService) {
this.machineCooldownService = machineCooldownService;
this.linkTaskMapper = linkTaskMapper;
this.statusHistoryMapper = statusHistoryMapper;
this.systemConfigService = systemConfigService;
}
/**
@@ -64,14 +68,13 @@ public class DeviceAllocationService {
log.info("设备占用检查完成:原候选设备数={}, 过滤后设备数={}, 可用设备={}",
availableDevices.size(), filteredDevices.size(), filteredDevices);
// 2. 打乱设备列表,实现负载均衡
List<String> shuffledDevices = new ArrayList<>(filteredDevices);
Collections.shuffle(shuffledDevices, ThreadLocalRandom.current());
// 2. 按前缀优先级排序,然后打乱(同优先级内随机)
List<String> sortedDevices = sortByPrefixPriority(filteredDevices);
log.info("设备列表已随机化:{}", shuffledDevices);
log.info("设备列表已按优先级排序并随机化:{}", sortedDevices);
// 3. 尝试原子分配设备(按随机顺序)
for (String deviceId : shuffledDevices) {
// 3. 尝试原子分配设备(按优先级顺序)
for (String deviceId : sortedDevices) {
if (machineCooldownService.tryAllocateDevice(deviceId, reason, linkTaskId)) {
log.info("设备分配成功:设备={}, 任务ID={}, 原因={}", deviceId, linkTaskId, reason);
return deviceId;
@@ -186,6 +189,73 @@ public class DeviceAllocationService {
return availableDevices;
}
/**
* 根据配置的前缀优先级对设备列表进行排序
* 优先级高的前缀排在前面,同优先级内随机打乱
* @param devices 原始设备列表
* @return 排序后的设备列表
*/
private List<String> sortByPrefixPriority(List<String> devices) {
if (devices == null || devices.isEmpty()) {
return devices;
}
// 获取配置的优先前缀列表
List<String> priorityPrefixes = systemConfigService.getDevicePriorityPrefixes();
if (priorityPrefixes.isEmpty()) {
// 没有配置优先前缀,直接随机打乱(保持原有行为)
List<String> shuffled = new ArrayList<>(devices);
Collections.shuffle(shuffled, ThreadLocalRandom.current());
return shuffled;
}
log.info("使用设备优先前缀配置:{}", priorityPrefixes);
// 按优先级分组:优先级索引 -> 设备列表
// -1 表示没有匹配任何优先前缀
List<List<String>> groups = new ArrayList<>();
for (int i = 0; i <= priorityPrefixes.size(); i++) {
groups.add(new ArrayList<>());
}
// 将设备分组
for (String deviceId : devices) {
int priorityIndex = -1;
// 查找设备匹配的最高优先级前缀
for (int i = 0; i < priorityPrefixes.size(); i++) {
if (deviceId.startsWith(priorityPrefixes.get(i))) {
priorityIndex = i;
break; // 找到第一个匹配的前缀就停止
}
}
if (priorityIndex >= 0) {
groups.get(priorityIndex).add(deviceId);
} else {
// 没有匹配的放到最后一组
groups.get(groups.size() - 1).add(deviceId);
}
}
// 对每组内部进行随机打乱,然后按优先级顺序合并
List<String> result = new ArrayList<>();
for (int i = 0; i < groups.size(); i++) {
List<String> group = groups.get(i);
if (!group.isEmpty()) {
Collections.shuffle(group, ThreadLocalRandom.current());
result.addAll(group);
if (i < priorityPrefixes.size()) {
log.debug("优先级{}(前缀={}{} 台设备", i, priorityPrefixes.get(i), group.size());
} else {
log.debug("无优先级设备:{} 台", group.size());
}
}
}
return result;
}
/**
* 验证设备分配结果(分配后的双重检查)
* @param deviceId 设备ID