import json import random import sys import numpy as np from typing import Dict, List from SugarRush1000 import SugarRush1000 """ 符号的价值的含义是对RTP调整的影响程度。符号价值越高,符号概率的调整对RTP的影响越大。比如当前RTP过高,要降低RTP,算法优先降低高价值符号的概率,提升低价值符号的概率。结果就是高价值符号出现频率降低,直接拉动了RTP的降低,低价值符号出现频率提升,又提升了级联效果的流畅性。 所有符号的价值不需要考虑归一化,但是要考虑比例。 调整权重的公式:deletaWeight = -(Error)x(LearningRate)x(SymbolsValue)x(CurrentWieght) 相对性原理:算法只关心“符号 A 的价值是符号 B 的多少倍”。如果 A 的价值是 2.0,B 是 1.0。当RTP偏高时,算法会让 A 的权重减少幅度是 B 的 2倍。 比例要合理,不能差距过大,否则调优过程非常不稳定。比如符号A Value=1000,符号B value=0.001, 会导致算法对符号A极其敏感,稍微一点误差就会让A的权重剧烈震荡,而符号B几乎完全不动。 每个符号的价值配置公式: Value(symbol)约是BasePay(symbol)xVolatilityFactor BasePay:是符号的期望赔率 VolatilityFactor:是符号的波动因子,人工设定的修正值。 * 对于Scotter:它没有基础赔付,但是能触发免费旋转,Scatter的潜在价值远高于普通符号,所以它的Value应该设置得更高。 * 对于高赔率符号:系数设为1.2~1.5。高赔率对RTP影响很大,调整它们可以很快让RTP收敛。 * 对于低赔率符号:系数设为0.8~1.0。它们主要贡献消除次数和触发级联效果,但是本身赔率低,对RTP的边际贡献较低 """ fast = {"iterations": 30, "spins": 100} middle = {"iterations": 15, "spins": 5_0000} slow = {"iterations": 10, "spins": 10_0000} config = { "scotter_count_weights": { "3": 7.26421894353717, "4": 4.1724734692682395, "5": 0.8119106579617028, "6": 0.20313929837878217, "7": 0.04857599989214818, }, "target_rtp": 85, "values": { "S": 0.5, "A": 0.4, "B": 0.6, "C": 0.8, "D": 1, "E": 1.2, "F": 1.4, "G": 0.6, }, "weights": { "A": 19.015957779792195, "B": 21.291015318701493, "C": 31.66660200727613, "D": 35.193596023259865, "E": 48.7122724047052, "F": 64.49005324700025, "G": 21.291015318701493, "S": 2.6840958157151236, }, "iterators": [ {"name": "fast", "iterations": 30, "spins": 10000}, ], "feature": "standard", } class SugarRushAutoTuner: def __init__( self, target_rtp: float = 96.5, values=None, weights=None, scotter_count_weights=None, feature: str = "normal", ): self.target_rtp = target_rtp self.rows = 7 self.cols = 7 self.bet = 1.0 self.feature = feature if scotter_count_weights: self.scotter_count_weights = scotter_count_weights # 符号定义及其价值(价值越高,对 RTP 影响越大) self.symbols_config = values # 初始权重 self.symbol_keys = list(self.symbols_config.keys()) if weights is not None: self.weights = weights else: self.weights = {k: 20.0 for k in self.symbol_keys} # 学习率 (控制调整速度,太小收敛慢,太大容易震荡) self.learning_rate = 0.01 # 0.002 print(f"初始化自动调优器,feature:{feature} 目标 RTP: {self.target_rtp}%") def simulate_one_batch(self, spins: int = 20000): begin_balance = 1_0000_0000 game = SugarRush1000( balance=begin_balance, weights=self.weights, scotter_counts_weights=self.scotter_count_weights, ) total_bet = 0.0 total_win = 0.0 total_scotter = 0 total_has_scotter = 0 for _ in range(spins): # 执行旋转 res = game.doSpin() if res["error"]: break # 统计数据 actual_cost = res["actual_bet"] if actual_cost > 0: total_bet += actual_cost if res["is_scotter"]: total_scotter += 1 has_scotter = res["is_scotter"] if not has_scotter: if has_scotter: break for row in res["grid"]: for symbol in row: if symbol == "S": has_scotter = True break if has_scotter: total_has_scotter += 1 total_win += res["win"] # 校验余额是否正确 assert ( f"{abs(begin_balance - game.balance):.2f}" == f"{abs(total_bet - total_win):.2f}" ) print( f"旋转{spins} 次,scotter {total_scotter}次, containe scotter {total_has_scotter}" ) return (total_win / total_bet) * 100 def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"): begin_balance = 1_0000_0000 game = SugarRush1000( balance=begin_balance, weights=self.weights, scotter_counts_weights=self.scotter_count_weights, ) total_bet = 0.0 total_win = 0.0 total_spins = 0 total_free_spins = 0 total_scotter = 0 for _ in range(spins): r = game.buy_free_spins(buy_type) total_bet += r["cost"] # 执行旋转 score = 0 can_spins = 1 while can_spins > 0: can_spins -= 1 total_spins += 1 res = game.doSpin() if res["error"]: break # 统计数据 score += res["win"] total_win += res["win"] if res["is_scotter"]: total_scotter += 1 total_free_spins += res["added_spins"] if res["free_spins_remaining"] >= 0: can_spins = res["free_spins_remaining"] if score != res["spin_total_win"]: print( "total_win != res[spin_total_win]", total_win, res["spin_total_win"] ) assert score == res["spin_total_win"] # 校验余额是否正确 assert ( f"{abs(begin_balance - game.balance):.2f}" == f"{abs(total_bet - total_win):.2f}" ) return (total_win / total_bet) * 100 def tune(self, iterations: int = 50, batch_spins: int = 20000): """ 迭代调整权重 """ history = [] for i in range(iterations): # 1. 模拟当前 RTP current_rtp = 0 if self.feature == "normal": current_rtp = self.simulate_one_batch(batch_spins) else: current_rtp = self.simulate_free_batch(batch_spins, self.feature) error = current_rtp - self.target_rtp history.append(current_rtp) print( f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%" ) # 2. 检查是否收敛 if abs(error) < 3: print(f"收敛成功!最终 RTP: {current_rtp:.4f}%") break # 3. 动态调整权重 # 调整策略:如果 RTP 太高,降低高价值符号的权重,提高低价值符号权重 # 调整量 = 误差 * 学习率 * 符号价值系数 # 防止初始误差过大导致权重崩塌 safe_error = max(min(error, 5.0), -5.0) adjustment_factor = safe_error * self.learning_rate # 防止权重变为负数 min_weight = 0 for sym in self.symbol_keys: value = self.symbols_config[sym] # 核心算法: # 如果 RTP > Target (Error > 0),我们需要降低 RTP。 # 对于高价值符号 (value大),我们需要减小其权重。 # Adjustment 应该是负的。所以: - adjustment_factor * value # 如果 RTP < Target (Error < 0),我们需要提高 RTP。 # 对于高价值符号,我们需要增加其权重。 # Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的,负负得正) delta = -adjustment_factor * value * self.weights[sym] # --- 优化:限制单次最大调整幅度 --- # 防止某次调整幅度超过权重的 40%,给算法留点“喘息”空间 max_change_ratio = 0.4 if abs(delta) > self.weights[sym] * max_change_ratio: delta = np.sign(delta) * self.weights[sym] * max_change_ratio new_weight = self.weights[sym] + delta # 限制最小权重,防止符号消失 if new_weight < min_weight: new_weight = min_weight if new_weight > 500: new_weight = 500 self.weights[sym] = new_weight return self.weights, history # --- 运行自动调优 --- def train_weights(): print("开始训练权重...") # 设置随机种子 seed = random.randint(1, 1000) random.seed(seed) np.random.seed(seed) print(f"随机种子: {seed}") print(f"符号价值: ") print(json.dumps(config["values"], indent=4, ensure_ascii=False)) tuner = SugarRushAutoTuner( target_rtp=config["target_rtp"], values=config["values"], weights=config["weights"], feature=config["feature"], scotter_count_weights=config["scotter_count_weights"], ) for t in config["iterators"]: name, iterations, spins = t.values() begin_weights = tuner.weights.copy() print(f"# {name}调优") final_weights, rtp_history = tuner.tune( iterations=iterations, batch_spins=spins ) print("\n=== 调优前的符号权重 ===") for sym, w in begin_weights.items(): print(f"{sym}: {w:.2f}") print("\n 符号出现概率 (%) ===") for sym, w in begin_weights.items(): print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%") print("\n=== 最终调整后的符号权重 ===") print(json.dumps(final_weights, indent=4, ensure_ascii=False)) # 将权重转换为概率百分比 total_w = sum(final_weights.values()) print("\n=== 符号出现概率 (%) ===") for sym, w in final_weights.items(): print(f"{sym}: {(w/total_w)*100:.2f}%") def verify(): print("开始进行权重校验:") for i in range(10): tuner = SugarRushAutoTuner( target_rtp=config["target_rtp"], values=config["values"], weights={ "A": 18.18442161117576, "B": 19.994169181485578, "C": 29.28572430711806, "D": 32.143058029000244, "E": 44.060855546850874, "F": 57.93237542185442, "G": 19.994169181485578, "S": 2.5426663862665424, }, scotter_count_weights=config["scotter_count_weights"], feature="super", ) print(f"第 {i+1}/10 次校验:") tuner.tune(iterations=1, batch_spins=10000) if __name__ == "__main__": if len(sys.argv) >= 2: verify() else: train_weights()