feat: 在设备检测逻辑中添加失败计数器和错误类型判断,优化设备快照获取失败时的处理逻辑;在设备分配服务中引入设备优先前缀配置,增强设备分配的灵活性和负载均衡能力
This commit is contained in:
@@ -119,6 +119,10 @@ public class Detection {
|
||||
return this.lastSnapshot;
|
||||
}
|
||||
|
||||
// 失败计数器(用于监控)
|
||||
private volatile int consecutiveFailures = 0;
|
||||
private static final int MAX_FAILURES_BEFORE_ALERT = 3;
|
||||
|
||||
/**
|
||||
* 定时任务:全量拉取并交由 DeviceStats 更新内存分类与审计。
|
||||
* 默认每 30 秒执行一次,可通过配置覆盖:detection.poll.cron 或 detection.poll.fixedDelayMs
|
||||
@@ -128,7 +132,14 @@ public class Detection {
|
||||
try {
|
||||
log.info("定时拉取设备快照并更新统计");
|
||||
DeviceStatusResponse snapshot = listAllDevices();
|
||||
|
||||
if (snapshot != null) {
|
||||
// 重置失败计数器
|
||||
if (consecutiveFailures > 0) {
|
||||
log.info("设备状态获取恢复正常,重置失败计数器(之前连续失败{}次)", consecutiveFailures);
|
||||
consecutiveFailures = 0;
|
||||
}
|
||||
|
||||
deviceStats.updateWithSnapshot(snapshot);
|
||||
log.info("设备快照更新统计完成");
|
||||
|
||||
@@ -155,9 +166,36 @@ public class Detection {
|
||||
} catch (Exception ex) {
|
||||
log.warn("基于设备快照推进USING→LOGGED_IN时出现异常", ex);
|
||||
}
|
||||
} else {
|
||||
// 快照获取失败,使用旧缓存继续部分功能
|
||||
DeviceStatusResponse cachedSnapshot = getLastSnapshot();
|
||||
if (cachedSnapshot != null) {
|
||||
log.warn("使用缓存的设备快照数据(可能过期),避免服务完全中断");
|
||||
// 不更新统计,但可以尝试继续状态推进
|
||||
} else {
|
||||
log.warn("无可用设备快照(包括缓存),跳过本轮更新");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("定时拉取设备快照并更新统计失败", e);
|
||||
consecutiveFailures++;
|
||||
|
||||
// 判断异常类型,给出更明确的错误信息
|
||||
String errorType = "未知错误";
|
||||
if (e instanceof org.springframework.web.reactive.function.client.WebClientRequestException) {
|
||||
if (e.getMessage().contains("connection timed out") || e.getMessage().contains("ConnectTimeoutException")) {
|
||||
errorType = "连接超时";
|
||||
} else if (e.getMessage().contains("Connection refused")) {
|
||||
errorType = "连接被拒绝";
|
||||
}
|
||||
}
|
||||
|
||||
if (consecutiveFailures >= MAX_FAILURES_BEFORE_ALERT) {
|
||||
log.error("【告警】定时拉取设备快照连续失败{}次(错误类型:{}),请检查脚本服务器连接: {}",
|
||||
consecutiveFailures, errorType, e.getMessage());
|
||||
} else {
|
||||
log.error("定时拉取设备快照并更新统计失败(连续失败{}次,错误类型:{})",
|
||||
consecutiveFailures, errorType, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -128,6 +128,19 @@ public class SystemConfigService {
|
||||
return getConfigValue("user.custom_scan_content", "");
|
||||
}
|
||||
|
||||
// 获取设备优先前缀列表(逗号分隔),返回List
|
||||
public List<String> getDevicePriorityPrefixes() {
|
||||
String value = getConfigValue("device.priority_prefixes", "");
|
||||
if (value == null || value.trim().isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
// 按逗号分隔,去除空白,过滤空字符串
|
||||
return java.util.Arrays.stream(value.split(","))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.collect(java.util.stream.Collectors.toList());
|
||||
}
|
||||
|
||||
// 批量更新配置
|
||||
public boolean updateConfigs(List<SystemConfig> configs) {
|
||||
if (configs == null || configs.isEmpty()) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.gameplatform.server.service.link;
|
||||
import com.gameplatform.server.mapper.agent.LinkTaskMapper;
|
||||
import com.gameplatform.server.model.entity.agent.LinkTask;
|
||||
import com.gameplatform.server.service.cooldown.MemoryMachineCooldownService;
|
||||
import com.gameplatform.server.service.admin.SystemConfigService;
|
||||
import com.gameplatform.server.mapper.history.LinkTaskStatusHistoryMapper;
|
||||
import com.gameplatform.server.model.entity.history.LinkTaskStatusHistory;
|
||||
import org.slf4j.Logger;
|
||||
@@ -28,13 +29,16 @@ public class DeviceAllocationService {
|
||||
private final MemoryMachineCooldownService machineCooldownService;
|
||||
private final LinkTaskMapper linkTaskMapper;
|
||||
private final LinkTaskStatusHistoryMapper statusHistoryMapper;
|
||||
private final SystemConfigService systemConfigService;
|
||||
|
||||
public DeviceAllocationService(MemoryMachineCooldownService machineCooldownService,
|
||||
LinkTaskMapper linkTaskMapper,
|
||||
LinkTaskStatusHistoryMapper statusHistoryMapper) {
|
||||
LinkTaskStatusHistoryMapper statusHistoryMapper,
|
||||
SystemConfigService systemConfigService) {
|
||||
this.machineCooldownService = machineCooldownService;
|
||||
this.linkTaskMapper = linkTaskMapper;
|
||||
this.statusHistoryMapper = statusHistoryMapper;
|
||||
this.systemConfigService = systemConfigService;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -64,14 +68,13 @@ public class DeviceAllocationService {
|
||||
log.info("设备占用检查完成:原候选设备数={}, 过滤后设备数={}, 可用设备={}",
|
||||
availableDevices.size(), filteredDevices.size(), filteredDevices);
|
||||
|
||||
// 2. 打乱设备列表,实现负载均衡
|
||||
List<String> shuffledDevices = new ArrayList<>(filteredDevices);
|
||||
Collections.shuffle(shuffledDevices, ThreadLocalRandom.current());
|
||||
// 2. 按前缀优先级排序,然后打乱(同优先级内随机)
|
||||
List<String> sortedDevices = sortByPrefixPriority(filteredDevices);
|
||||
|
||||
log.info("设备列表已随机化:{}", shuffledDevices);
|
||||
log.info("设备列表已按优先级排序并随机化:{}", sortedDevices);
|
||||
|
||||
// 3. 尝试原子分配设备(按随机顺序)
|
||||
for (String deviceId : shuffledDevices) {
|
||||
// 3. 尝试原子分配设备(按优先级顺序)
|
||||
for (String deviceId : sortedDevices) {
|
||||
if (machineCooldownService.tryAllocateDevice(deviceId, reason, linkTaskId)) {
|
||||
log.info("设备分配成功:设备={}, 任务ID={}, 原因={}", deviceId, linkTaskId, reason);
|
||||
return deviceId;
|
||||
@@ -186,6 +189,73 @@ public class DeviceAllocationService {
|
||||
return availableDevices;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据配置的前缀优先级对设备列表进行排序
|
||||
* 优先级高的前缀排在前面,同优先级内随机打乱
|
||||
* @param devices 原始设备列表
|
||||
* @return 排序后的设备列表
|
||||
*/
|
||||
private List<String> sortByPrefixPriority(List<String> devices) {
|
||||
if (devices == null || devices.isEmpty()) {
|
||||
return devices;
|
||||
}
|
||||
|
||||
// 获取配置的优先前缀列表
|
||||
List<String> priorityPrefixes = systemConfigService.getDevicePriorityPrefixes();
|
||||
|
||||
if (priorityPrefixes.isEmpty()) {
|
||||
// 没有配置优先前缀,直接随机打乱(保持原有行为)
|
||||
List<String> shuffled = new ArrayList<>(devices);
|
||||
Collections.shuffle(shuffled, ThreadLocalRandom.current());
|
||||
return shuffled;
|
||||
}
|
||||
|
||||
log.info("使用设备优先前缀配置:{}", priorityPrefixes);
|
||||
|
||||
// 按优先级分组:优先级索引 -> 设备列表
|
||||
// -1 表示没有匹配任何优先前缀
|
||||
List<List<String>> groups = new ArrayList<>();
|
||||
for (int i = 0; i <= priorityPrefixes.size(); i++) {
|
||||
groups.add(new ArrayList<>());
|
||||
}
|
||||
|
||||
// 将设备分组
|
||||
for (String deviceId : devices) {
|
||||
int priorityIndex = -1;
|
||||
// 查找设备匹配的最高优先级前缀
|
||||
for (int i = 0; i < priorityPrefixes.size(); i++) {
|
||||
if (deviceId.startsWith(priorityPrefixes.get(i))) {
|
||||
priorityIndex = i;
|
||||
break; // 找到第一个匹配的前缀就停止
|
||||
}
|
||||
}
|
||||
|
||||
if (priorityIndex >= 0) {
|
||||
groups.get(priorityIndex).add(deviceId);
|
||||
} else {
|
||||
// 没有匹配的放到最后一组
|
||||
groups.get(groups.size() - 1).add(deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
// 对每组内部进行随机打乱,然后按优先级顺序合并
|
||||
List<String> result = new ArrayList<>();
|
||||
for (int i = 0; i < groups.size(); i++) {
|
||||
List<String> group = groups.get(i);
|
||||
if (!group.isEmpty()) {
|
||||
Collections.shuffle(group, ThreadLocalRandom.current());
|
||||
result.addAll(group);
|
||||
if (i < priorityPrefixes.size()) {
|
||||
log.debug("优先级{}(前缀={}):{} 台设备", i, priorityPrefixes.get(i), group.size());
|
||||
} else {
|
||||
log.debug("无优先级设备:{} 台", group.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证设备分配结果(分配后的双重检查)
|
||||
* @param deviceId 设备ID
|
||||
|
||||
Reference in New Issue
Block a user