import json
import random
import sys
import numpy as np
from typing import Dict, List

from SugarRush1000 import SugarRush1000

"""
符号的价值的含义是对RTP调整的影响程度。符号价值越高，符号概率的调整对RTP的影响越大。比如当前RTP过高，要降低RTP,算法优先降低高价值符号的概率，提升低价值符号的概率。结果就是高价值符号出现频率降低，直接拉动了RTP的降低，低价值符号出现频率提升，又提升了级联效果的流畅性。

所有符号的价值不需要考虑归一化，但是要考虑比例。
调整权重的公式：deletaWeight = -(Error)x(LearningRate)x(SymbolsValue)x(CurrentWieght)
相对性原理：算法只关心“符号 A 的价值是符号 B 的多少倍”。如果 A 的价值是 2.0，B 是 1.0。当RTP偏高时，算法会让 A 的权重减少幅度是 B 的 2倍。

比例要合理，不能差距过大，否则调优过程非常不稳定。比如符号A Value=1000,符号B value=0.001, 会导致算法对符号A极其敏感，稍微一点误差就会让A的权重剧烈震荡，而符号B几乎完全不动。

每个符号的价值配置公式：
Value(symbol)约是BasePay(symbol)xVolatilityFactor
BasePay:是符号的期望赔率
VolatilityFactor:是符号的波动因子，人工设定的修正值。
    * 对于Scotter:它没有基础赔付，但是能触发免费旋转，Scatter的潜在价值远高于普通符号，所以它的Value应该设置得更高。
    * 对于高赔率符号：系数设为1.2~1.5。高赔率对RTP影响很大，调整它们可以很快让RTP收敛。
    * 对于低赔率符号：系数设为0.8~1.0。它们主要贡献消除次数和触发级联效果，但是本身赔率低，对RTP的边际贡献较低
"""

fast = {"iterations": 30, "spins": 100}
middle = {"iterations": 15, "spins": 5_0000}
slow = {"iterations": 10, "spins": 10_0000}

config = {
    "scotter_count_weights": {
        "3": 7.26421894353717,
        "4": 4.1724734692682395,
        "5": 0.8119106579617028,
        "6": 0.20313929837878217,
        "7": 0.04857599989214818,
    },
    "target_rtp": 85,
    "values": {
        "S": 0.5,
        "A": 0.4,
        "B": 0.6,
        "C": 0.8,
        "D": 1,
        "E": 1.2,
        "F": 1.4,
        "G": 0.6,
    },
    "weights": {
        "A": 19.015957779792195,
        "B": 21.291015318701493,
        "C": 31.66660200727613,
        "D": 35.193596023259865,
        "E": 48.7122724047052,
        "F": 64.49005324700025,
        "G": 21.291015318701493,
        "S": 2.6840958157151236,
    },
    "iterators": [
        {"name": "fast", "iterations": 30, "spins": 10000},
    ],
    "feature": "standard",
}


class SugarRushAutoTuner:
    def __init__(
        self,
        target_rtp: float = 96.5,
        values=None,
        weights=None,
        scotter_count_weights=None,
        feature: str = "normal",
    ):
        self.target_rtp = target_rtp
        self.rows = 7
        self.cols = 7
        self.bet = 1.0
        self.feature = feature
        if scotter_count_weights:
            self.scotter_count_weights = scotter_count_weights
        # 符号定义及其价值（价值越高，对 RTP 影响越大）
        self.symbols_config = values

        # 初始权重
        self.symbol_keys = list(self.symbols_config.keys())
        if weights is not None:
            self.weights = weights
        else:
            self.weights = {k: 20.0 for k in self.symbol_keys}

        # 学习率 (控制调整速度，太小收敛慢，太大容易震荡)
        self.learning_rate = 0.01  # 0.002

        print(f"初始化自动调优器，feature:{feature} 目标 RTP: {self.target_rtp}%")

    def simulate_one_batch(self, spins: int = 20000):
        begin_balance = 1_0000_0000
        game = SugarRush1000(
            balance=begin_balance,
            weights=self.weights,
            scotter_counts_weights=self.scotter_count_weights,
        )

        total_bet = 0.0
        total_win = 0.0
        total_scotter = 0
        total_has_scotter = 0
        for _ in range(spins):
            # 执行旋转
            res = game.doSpin()
            if res["error"]:
                break

            # 统计数据
            actual_cost = res["actual_bet"]
            if actual_cost > 0:
                total_bet += actual_cost
            if res["is_scotter"]:
                total_scotter += 1

            has_scotter = res["is_scotter"]
            if not has_scotter:
                if has_scotter:
                    break
                for row in res["grid"]:
                    for symbol in row:
                        if symbol == "S":
                            has_scotter = True
                            break
            if has_scotter:
                total_has_scotter += 1

            total_win += res["win"]

        # 校验余额是否正确
        assert (
            f"{abs(begin_balance - game.balance):.2f}"
            == f"{abs(total_bet - total_win):.2f}"
        )
        print(
            f"旋转{spins} 次，scotter {total_scotter}次, containe scotter {total_has_scotter}"
        )
        return (total_win / total_bet) * 100

    def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
        begin_balance = 1_0000_0000
        game = SugarRush1000(
            balance=begin_balance,
            weights=self.weights,
            scotter_counts_weights=self.scotter_count_weights,
        )

        total_bet = 0.0
        total_win = 0.0

        total_spins = 0
        total_free_spins = 0
        total_scotter = 0
        for _ in range(spins):
            r = game.buy_free_spins(buy_type)
            total_bet += r["cost"]
            # 执行旋转
            score = 0
            can_spins = 1
            while can_spins > 0:
                can_spins -= 1

                total_spins += 1
                res = game.doSpin()
                if res["error"]:
                    break

                # 统计数据
                score += res["win"]
                total_win += res["win"]
                if res["is_scotter"]:
                    total_scotter += 1
                    total_free_spins += res["added_spins"]
                if res["free_spins_remaining"] >= 0:
                    can_spins = res["free_spins_remaining"]
            if score != res["spin_total_win"]:
                print(
                    "total_win != res[spin_total_win]", total_win, res["spin_total_win"]
                )
            assert score == res["spin_total_win"]
        # 校验余额是否正确
        assert (
            f"{abs(begin_balance - game.balance):.2f}"
            == f"{abs(total_bet - total_win):.2f}"
        )

        return (total_win / total_bet) * 100

    def tune(self, iterations: int = 50, batch_spins: int = 20000):
        """
        迭代调整权重
        """
        history = []

        for i in range(iterations):
            # 1. 模拟当前 RTP
            current_rtp = 0
            if self.feature == "normal":
                current_rtp = self.simulate_one_batch(batch_spins)
            else:
                current_rtp = self.simulate_free_batch(batch_spins, self.feature)

            error = current_rtp - self.target_rtp

            history.append(current_rtp)

            print(
                f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
            )

            # 2. 检查是否收敛
            if abs(error) < 3:
                print(f"收敛成功！最终 RTP: {current_rtp:.4f}%")
                break

            # 3. 动态调整权重
            # 调整策略：如果 RTP 太高，降低高价值符号的权重，提高低价值符号权重
            # 调整量 = 误差 * 学习率 * 符号价值系数
            # 防止初始误差过大导致权重崩塌
            safe_error = max(min(error, 5.0), -5.0)
            adjustment_factor = safe_error * self.learning_rate

            # 防止权重变为负数
            min_weight = 0

            for sym in self.symbol_keys:
                value = self.symbols_config[sym]

                # 核心算法：
                # 如果 RTP > Target (Error > 0)，我们需要降低 RTP。
                # 对于高价值符号 (value大)，我们需要减小其权重。
                # Adjustment 应该是负的。所以: - adjustment_factor * value

                # 如果 RTP < Target (Error < 0)，我们需要提高 RTP。
                # 对于高价值符号，我们需要增加其权重。
                # Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的，负负得正)

                delta = -adjustment_factor * value * self.weights[sym]

                # --- 优化：限制单次最大调整幅度 ---
                # 防止某次调整幅度超过权重的 40%，给算法留点“喘息”空间
                max_change_ratio = 0.4
                if abs(delta) > self.weights[sym] * max_change_ratio:
                    delta = np.sign(delta) * self.weights[sym] * max_change_ratio

                new_weight = self.weights[sym] + delta

                # 限制最小权重，防止符号消失
                if new_weight < min_weight:
                    new_weight = min_weight

                if new_weight > 500:
                    new_weight = 500

                self.weights[sym] = new_weight

        return self.weights, history


# --- 运行自动调优 ---
def train_weights():
    print("开始训练权重...")
    # 设置随机种子
    seed = random.randint(1, 1000)
    random.seed(seed)
    np.random.seed(seed)

    print(f"随机种子: {seed}")
    print(f"符号价值: ")
    print(json.dumps(config["values"], indent=4, ensure_ascii=False))

    tuner = SugarRushAutoTuner(
        target_rtp=config["target_rtp"],
        values=config["values"],
        weights=config["weights"],
        feature=config["feature"],
        scotter_count_weights=config["scotter_count_weights"],
    )

    for t in config["iterators"]:
        name, iterations, spins = t.values()
        begin_weights = tuner.weights.copy()
        print(f"# {name}调优")
        final_weights, rtp_history = tuner.tune(
            iterations=iterations, batch_spins=spins
        )

        print("\n=== 调优前的符号权重 ===")
        for sym, w in begin_weights.items():
            print(f"{sym}: {w:.2f}")
        print("\n 符号出现概率 (%) ===")
        for sym, w in begin_weights.items():
            print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")

        print("\n=== 最终调整后的符号权重 ===")
        print(json.dumps(final_weights, indent=4, ensure_ascii=False))

        # 将权重转换为概率百分比
        total_w = sum(final_weights.values())
        print("\n=== 符号出现概率 (%) ===")
        for sym, w in final_weights.items():
            print(f"{sym}: {(w/total_w)*100:.2f}%")


def verify():
    print("开始进行权重校验:")
    for i in range(10):
        tuner = SugarRushAutoTuner(
            target_rtp=config["target_rtp"],
            values=config["values"],
            weights={
                "A": 18.18442161117576,
                "B": 19.994169181485578,
                "C": 29.28572430711806,
                "D": 32.143058029000244,
                "E": 44.060855546850874,
                "F": 57.93237542185442,
                "G": 19.994169181485578,
                "S": 2.5426663862665424,
            },
            scotter_count_weights=config["scotter_count_weights"],
            feature="super",
        )
        print(f"第 {i+1}/10 次校验:")
        tuner.tune(iterations=1, batch_spins=10000)


if __name__ == "__main__":
    if len(sys.argv) >= 2:
        verify()
    else:
        train_weights()