import json
import random
import sys
import numpy as np
from typing import Dict, List

from SugarRush1000 import SugarRush1000

"""
训练购买免费旋转功能时，一次购买可以获得的平均免费旋转次数
"""


def init_symbol_value():
    return {
        "3": 0.1,
        "4": 0.4,
        "5": 1,
        "6": 1.5,
        "7": 2,
    }


class SugarRushAutoTuner:
    def __init__(
        self,
        target_rtp: float = 96.5,
        batch_spins=20000,
        weights=None,
        feature: str = "normal",
    ):
        self.target_rtp = target_rtp
        self.rows = 7
        self.cols = 7
        self.bet = 1.0
        self.batch_spins = batch_spins
        self.feature = feature

        # 符号定义及其价值（价值越高，对 RTP 影响越大）
        self.symbols_config = init_symbol_value()

        # 初始权重
        self.symbol_keys = list(self.symbols_config.keys())
        if weights is not None:
            self.weights = weights
        else:
            self.weights = {k: 20.0 for k in self.symbol_keys}

        # 学习率 (控制调整速度，太小收敛慢，太大容易震荡)
        self.learning_rate = 0.002  # 0.01

        print(f"初始化自动调优器，feature:{feature} 目标 RTP: {self.target_rtp}%")

    def simulate_one_batch(self, spins: int = 20000):
        begin_balance = 1_0000_0000
        game = SugarRush1000(balance=begin_balance, weights=self.weights)

        total_bet = 0.0
        total_win = 0.0

        for _ in range(spins):
            # 执行旋转
            res = game.doSpin()
            if res["error"]:
                break

            # 统计数据
            actual_cost = res["actual_bet"]
            if actual_cost > 0:
                total_bet += actual_cost

            total_win += res["win"]

        # 校验余额是否正确
        assert (
            f"{abs(begin_balance - game.balance):.2f}"
            == f"{abs(total_bet - total_win):.2f}"
        )

        return (total_win / total_bet) * 100

    def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
        game = SugarRush1000()
        scotter_counts = list(self.weights.keys())
        scotter_counts_weights = list(self.weights.values())
        total_free_spins = 0
        for _ in range(spins):
            scatters_count = random.choices(
                scotter_counts, weights=scotter_counts_weights, k=1
            )[0]
            total_free_spins += game._add_free_spins(int(scatters_count))
        print(f"购买 {spins} 次免费旋转，实际免费旋转次数 {total_free_spins} 次")

        return total_free_spins / spins

    def tune(self, iterations: int = 50):
        """
        迭代调整权重
        """
        history = []

        for i in range(iterations):
            # 1. 模拟当前 RTP
            current_rtp = 0
            if self.feature == "normal":
                current_rtp = self.simulate_one_batch(self.batch_spins)
            else:
                current_rtp = self.simulate_free_batch(self.batch_spins, self.feature)

            error = current_rtp - self.target_rtp

            history.append(current_rtp)

            print(
                f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
            )

            # 2. 检查是否收敛
            if abs(error) < 0.1:
                print(f"收敛成功！最终 RTP: {current_rtp:.4f}%")
                break

            # 3. 动态调整权重
            # 调整策略：如果 RTP 太高，降低高价值符号的权重，提高低价值符号权重
            # 调整量 = 误差 * 学习率 * 符号价值系数
            # 防止初始误差过大导致权重崩塌
            safe_error = max(min(error, 5.0), -5.0)
            adjustment_factor = safe_error * self.learning_rate

            # 防止权重变为负数
            min_weight = 0

            for sym in self.symbol_keys:
                value = self.symbols_config[sym]

                # 核心算法：
                # 如果 RTP > Target (Error > 0)，我们需要降低 RTP。
                # 对于高价值符号 (value大)，我们需要减小其权重。
                # Adjustment 应该是负的。所以: - adjustment_factor * value

                # 如果 RTP < Target (Error < 0)，我们需要提高 RTP。
                # 对于高价值符号，我们需要增加其权重。
                # Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的，负负得正)

                delta = -adjustment_factor * value * self.weights[sym]

                # --- 优化：限制单次最大调整幅度 ---
                # 防止某次调整幅度超过权重的 40%，给算法留点“喘息”空间
                max_change_ratio = 0.4
                if abs(delta) > self.weights[sym] * max_change_ratio:
                    delta = np.sign(delta) * self.weights[sym] * max_change_ratio

                new_weight = self.weights[sym] + delta

                # 限制最小权重，防止符号消失
                if new_weight < min_weight:
                    new_weight = min_weight

                if new_weight > 500:
                    new_weight = 500

                self.weights[sym] = new_weight

        return self.weights, history


# --- 运行自动调优 ---
def train_weights():
    print("开始训练权重...")
    # 设置随机种子
    seed = 42
    random.seed(seed)
    np.random.seed(seed)

    print(f"随机种子: {seed}")

    fast = {"iterations": 100, "spins": 50_0000}
    # middle = {"iterations": 15, "spins": 5_0000}
    # slow = {"iterations": 10, "spins": 10_0000}

    current = fast
    configs = [
        ("fast", fast),
        # ("middle", middle), ("slow", slow)
    ]

    tuner = SugarRushAutoTuner(
        target_rtp=11,
        batch_spins=current["spins"],
        weights={
            "3": 7.300320151890674,
            "4": 4.256044392311739,
            "5": 0.8531843171577663,
            "6": 0.21882648882051972,
            "7": 0.05364159605552199,
        },
        feature="standard",
    )
    for name, config in configs:
        begin_weights = tuner.weights.copy()
        print(f"# {name}调优")
        tuner.batch_spins = config["spins"]
        final_weights, rtp_history = tuner.tune(iterations=config["iterations"])

        print("\n=== 调优前的符号权重 ===")
        for sym, w in begin_weights.items():
            print(f"{sym}: {w:.2f}")
        print("\n 符号出现概率 (%) ===")
        for sym, w in begin_weights.items():
            print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")

        print("\n=== 最终调整后的符号权重 ===")
        print(json.dumps(final_weights, indent=4, ensure_ascii=False))

        # 将权重转换为概率百分比
        total_w = sum(final_weights.values())
        print("\n=== 符号出现概率 (%) ===")
        for sym, w in final_weights.items():
            print(f"{sym}: {(w/total_w)*100:.2f}%")


def verify():
    print("开始进行权重校验:")
    for i in range(10):
        tuner = SugarRushAutoTuner(
            target_rtp=56,
            batch_spins=10_0000,
            weights={
                "A": 12.048116942034044,
                "B": 23.797750079057277,
                "C": 38.01412853023891,
                "D": 49.64407598502997,
                "E": 59.15353799355775,
                "F": 70.33559681987649,
                "G": 83.45720386881379,
                "S": 1.0979547602954776,
            },
        )
        print(f"第 {i+1}/10 次校验:")
        tuner.tune(iterations=1)


if __name__ == "__main__":
    if len(sys.argv) >= 2:
        verify()
    else:
        train_weights()