Initial commit

2026-04-23 16:58:11 +08:00
commit 267eba1eca
2582 changed files with 273338 additions and 0 deletions
--- a/pyground/sugar/SugarRushAutoRTPTuner.py
+++ b/pyground/sugar/SugarRushAutoRTPTuner.py
@@ -0,0 +1,339 @@
+import json
+import random
+import sys
+import numpy as np
+from typing import Dict, List
+
+from SugarRush1000 import SugarRush1000
+
+"""
+符号的价值的含义是对RTP调整的影响程度。符号价值越高，符号概率的调整对RTP的影响越大。比如当前RTP过高，要降低RTP,算法优先降低高价值符号的概率，提升低价值符号的概率。结果就是高价值符号出现频率降低，直接拉动了RTP的降低，低价值符号出现频率提升，又提升了级联效果的流畅性。
+
+所有符号的价值不需要考虑归一化，但是要考虑比例。
+调整权重的公式：deletaWeight = -(Error)x(LearningRate)x(SymbolsValue)x(CurrentWieght)
+相对性原理：算法只关心“符号 A 的价值是符号 B 的多少倍”。如果 A 的价值是 2.0，B 是 1.0。当RTP偏高时，算法会让 A 的权重减少幅度是 B 的 2倍。
+
+比例要合理，不能差距过大，否则调优过程非常不稳定。比如符号A Value=1000,符号B value=0.001, 会导致算法对符号A极其敏感，稍微一点误差就会让A的权重剧烈震荡，而符号B几乎完全不动。
+
+每个符号的价值配置公式：
+Value(symbol)约是BasePay(symbol)xVolatilityFactor
+BasePay:是符号的期望赔率
+VolatilityFactor:是符号的波动因子，人工设定的修正值。
+    * 对于Scotter:它没有基础赔付，但是能触发免费旋转，Scatter的潜在价值远高于普通符号，所以它的Value应该设置得更高。
+    * 对于高赔率符号：系数设为1.2~1.5。高赔率对RTP影响很大，调整它们可以很快让RTP收敛。
+    * 对于低赔率符号：系数设为0.8~1.0。它们主要贡献消除次数和触发级联效果，但是本身赔率低，对RTP的边际贡献较低
+"""
+
+fast = {"iterations": 30, "spins": 100}
+middle = {"iterations": 15, "spins": 5_0000}
+slow = {"iterations": 10, "spins": 10_0000}
+
+config = {
+    "scotter_count_weights": {
+        "3": 7.26421894353717,
+        "4": 4.1724734692682395,
+        "5": 0.8119106579617028,
+        "6": 0.20313929837878217,
+        "7": 0.04857599989214818,
+    },
+    "target_rtp": 85,
+    "values": {
+        "S": 0.5,
+        "A": 0.4,
+        "B": 0.6,
+        "C": 0.8,
+        "D": 1,
+        "E": 1.2,
+        "F": 1.4,
+        "G": 0.6,
+    },
+    "weights": {
+        "A": 19.015957779792195,
+        "B": 21.291015318701493,
+        "C": 31.66660200727613,
+        "D": 35.193596023259865,
+        "E": 48.7122724047052,
+        "F": 64.49005324700025,
+        "G": 21.291015318701493,
+        "S": 2.6840958157151236,
+    },
+    "iterators": [
+        {"name": "fast", "iterations": 30, "spins": 10000},
+    ],
+    "feature": "standard",
+}
+
+
+class SugarRushAutoTuner:
+    def __init__(
+        self,
+        target_rtp: float = 96.5,
+        values=None,
+        weights=None,
+        scotter_count_weights=None,
+        feature: str = "normal",
+    ):
+        self.target_rtp = target_rtp
+        self.rows = 7
+        self.cols = 7
+        self.bet = 1.0
+        self.feature = feature
+        if scotter_count_weights:
+            self.scotter_count_weights = scotter_count_weights
+        # 符号定义及其价值（价值越高，对 RTP 影响越大）
+        self.symbols_config = values
+
+        # 初始权重
+        self.symbol_keys = list(self.symbols_config.keys())
+        if weights is not None:
+            self.weights = weights
+        else:
+            self.weights = {k: 20.0 for k in self.symbol_keys}
+
+        # 学习率 (控制调整速度，太小收敛慢，太大容易震荡)
+        self.learning_rate = 0.01  # 0.002
+
+        print(f"初始化自动调优器，feature:{feature} 目标 RTP: {self.target_rtp}%")
+
+    def simulate_one_batch(self, spins: int = 20000):
+        begin_balance = 1_0000_0000
+        game = SugarRush1000(
+            balance=begin_balance,
+            weights=self.weights,
+            scotter_counts_weights=self.scotter_count_weights,
+        )
+
+        total_bet = 0.0
+        total_win = 0.0
+        total_scotter = 0
+        total_has_scotter = 0
+        for _ in range(spins):
+            # 执行旋转
+            res = game.doSpin()
+            if res["error"]:
+                break
+
+            # 统计数据
+            actual_cost = res["actual_bet"]
+            if actual_cost > 0:
+                total_bet += actual_cost
+            if res["is_scotter"]:
+                total_scotter += 1
+
+            has_scotter = res["is_scotter"]
+            if not has_scotter:
+                if has_scotter:
+                    break
+                for row in res["grid"]:
+                    for symbol in row:
+                        if symbol == "S":
+                            has_scotter = True
+                            break
+            if has_scotter:
+                total_has_scotter += 1
+
+            total_win += res["win"]
+
+        # 校验余额是否正确
+        assert (
+            f"{abs(begin_balance - game.balance):.2f}"
+            == f"{abs(total_bet - total_win):.2f}"
+        )
+        print(
+            f"旋转{spins} 次，scotter {total_scotter}次, containe scotter {total_has_scotter}"
+        )
+        return (total_win / total_bet) * 100
+
+    def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
+        begin_balance = 1_0000_0000
+        game = SugarRush1000(
+            balance=begin_balance,
+            weights=self.weights,
+            scotter_counts_weights=self.scotter_count_weights,
+        )
+
+        total_bet = 0.0
+        total_win = 0.0
+
+        total_spins = 0
+        total_free_spins = 0
+        total_scotter = 0
+        for _ in range(spins):
+            r = game.buy_free_spins(buy_type)
+            total_bet += r["cost"]
+            # 执行旋转
+            score = 0
+            can_spins = 1
+            while can_spins > 0:
+                can_spins -= 1
+
+                total_spins += 1
+                res = game.doSpin()
+                if res["error"]:
+                    break
+
+                # 统计数据
+                score += res["win"]
+                total_win += res["win"]
+                if res["is_scotter"]:
+                    total_scotter += 1
+                    total_free_spins += res["added_spins"]
+                if res["free_spins_remaining"] >= 0:
+                    can_spins = res["free_spins_remaining"]
+            if score != res["spin_total_win"]:
+                print(
+                    "total_win != res[spin_total_win]", total_win, res["spin_total_win"]
+                )
+            assert score == res["spin_total_win"]
+        # 校验余额是否正确
+        assert (
+            f"{abs(begin_balance - game.balance):.2f}"
+            == f"{abs(total_bet - total_win):.2f}"
+        )
+
+        return (total_win / total_bet) * 100
+
+    def tune(self, iterations: int = 50, batch_spins: int = 20000):
+        """
+        迭代调整权重
+        """
+        history = []
+
+        for i in range(iterations):
+            # 1. 模拟当前 RTP
+            current_rtp = 0
+            if self.feature == "normal":
+                current_rtp = self.simulate_one_batch(batch_spins)
+            else:
+                current_rtp = self.simulate_free_batch(batch_spins, self.feature)
+
+            error = current_rtp - self.target_rtp
+
+            history.append(current_rtp)
+
+            print(
+                f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
+            )
+
+            # 2. 检查是否收敛
+            if abs(error) < 3:
+                print(f"收敛成功！最终 RTP: {current_rtp:.4f}%")
+                break
+
+            # 3. 动态调整权重
+            # 调整策略：如果 RTP 太高，降低高价值符号的权重，提高低价值符号权重
+            # 调整量 = 误差 * 学习率 * 符号价值系数
+            # 防止初始误差过大导致权重崩塌
+            safe_error = max(min(error, 5.0), -5.0)
+            adjustment_factor = safe_error * self.learning_rate
+
+            # 防止权重变为负数
+            min_weight = 0
+
+            for sym in self.symbol_keys:
+                value = self.symbols_config[sym]
+
+                # 核心算法：
+                # 如果 RTP > Target (Error > 0)，我们需要降低 RTP。
+                # 对于高价值符号 (value大)，我们需要减小其权重。
+                # Adjustment 应该是负的。所以: - adjustment_factor * value
+
+                # 如果 RTP < Target (Error < 0)，我们需要提高 RTP。
+                # 对于高价值符号，我们需要增加其权重。
+                # Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的，负负得正)
+
+                delta = -adjustment_factor * value * self.weights[sym]
+
+                # --- 优化：限制单次最大调整幅度 ---
+                # 防止某次调整幅度超过权重的 40%，给算法留点“喘息”空间
+                max_change_ratio = 0.4
+                if abs(delta) > self.weights[sym] * max_change_ratio:
+                    delta = np.sign(delta) * self.weights[sym] * max_change_ratio
+
+                new_weight = self.weights[sym] + delta
+
+                # 限制最小权重，防止符号消失
+                if new_weight < min_weight:
+                    new_weight = min_weight
+
+                if new_weight > 500:
+                    new_weight = 500
+
+                self.weights[sym] = new_weight
+
+        return self.weights, history
+
+
+# --- 运行自动调优 ---
+def train_weights():
+    print("开始训练权重...")
+    # 设置随机种子
+    seed = random.randint(1, 1000)
+    random.seed(seed)
+    np.random.seed(seed)
+
+    print(f"随机种子: {seed}")
+    print(f"符号价值: ")
+    print(json.dumps(config["values"], indent=4, ensure_ascii=False))
+
+    tuner = SugarRushAutoTuner(
+        target_rtp=config["target_rtp"],
+        values=config["values"],
+        weights=config["weights"],
+        feature=config["feature"],
+        scotter_count_weights=config["scotter_count_weights"],
+    )
+
+    for t in config["iterators"]:
+        name, iterations, spins = t.values()
+        begin_weights = tuner.weights.copy()
+        print(f"# {name}调优")
+        final_weights, rtp_history = tuner.tune(
+            iterations=iterations, batch_spins=spins
+        )
+
+        print("\n=== 调优前的符号权重 ===")
+        for sym, w in begin_weights.items():
+            print(f"{sym}: {w:.2f}")
+        print("\n 符号出现概率 (%) ===")
+        for sym, w in begin_weights.items():
+            print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")
+
+        print("\n=== 最终调整后的符号权重 ===")
+        print(json.dumps(final_weights, indent=4, ensure_ascii=False))
+
+        # 将权重转换为概率百分比
+        total_w = sum(final_weights.values())
+        print("\n=== 符号出现概率 (%) ===")
+        for sym, w in final_weights.items():
+            print(f"{sym}: {(w/total_w)*100:.2f}%")
+
+
+def verify():
+    print("开始进行权重校验:")
+    for i in range(10):
+        tuner = SugarRushAutoTuner(
+            target_rtp=config["target_rtp"],
+            values=config["values"],
+            weights={
+                "A": 18.18442161117576,
+                "B": 19.994169181485578,
+                "C": 29.28572430711806,
+                "D": 32.143058029000244,
+                "E": 44.060855546850874,
+                "F": 57.93237542185442,
+                "G": 19.994169181485578,
+                "S": 2.5426663862665424,
+            },
+            scotter_count_weights=config["scotter_count_weights"],
+            feature="super",
+        )
+        print(f"第 {i+1}/10 次校验:")
+        tuner.tune(iterations=1, batch_spins=10000)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) >= 2:
+        verify()
+    else:
+        train_weights()