248 lines
7.7 KiB
Python
248 lines
7.7 KiB
Python
import json
|
||
import random
|
||
import sys
|
||
import numpy as np
|
||
from typing import Dict, List
|
||
|
||
from SugarRush1000 import SugarRush1000
|
||
|
||
"""
|
||
训练购买免费旋转功能时,一次购买可以获得的平均免费旋转次数
|
||
"""
|
||
|
||
|
||
def init_symbol_value():
|
||
return {
|
||
"3": 0.1,
|
||
"4": 0.4,
|
||
"5": 1,
|
||
"6": 1.5,
|
||
"7": 2,
|
||
}
|
||
|
||
|
||
class SugarRushAutoTuner:
|
||
def __init__(
|
||
self,
|
||
target_rtp: float = 96.5,
|
||
batch_spins=20000,
|
||
weights=None,
|
||
feature: str = "normal",
|
||
):
|
||
self.target_rtp = target_rtp
|
||
self.rows = 7
|
||
self.cols = 7
|
||
self.bet = 1.0
|
||
self.batch_spins = batch_spins
|
||
self.feature = feature
|
||
|
||
# 符号定义及其价值(价值越高,对 RTP 影响越大)
|
||
self.symbols_config = init_symbol_value()
|
||
|
||
# 初始权重
|
||
self.symbol_keys = list(self.symbols_config.keys())
|
||
if weights is not None:
|
||
self.weights = weights
|
||
else:
|
||
self.weights = {k: 20.0 for k in self.symbol_keys}
|
||
|
||
# 学习率 (控制调整速度,太小收敛慢,太大容易震荡)
|
||
self.learning_rate = 0.002 # 0.01
|
||
|
||
print(f"初始化自动调优器,feature:{feature} 目标 RTP: {self.target_rtp}%")
|
||
|
||
def simulate_one_batch(self, spins: int = 20000):
|
||
begin_balance = 1_0000_0000
|
||
game = SugarRush1000(balance=begin_balance, weights=self.weights)
|
||
|
||
total_bet = 0.0
|
||
total_win = 0.0
|
||
|
||
for _ in range(spins):
|
||
# 执行旋转
|
||
res = game.doSpin()
|
||
if res["error"]:
|
||
break
|
||
|
||
# 统计数据
|
||
actual_cost = res["actual_bet"]
|
||
if actual_cost > 0:
|
||
total_bet += actual_cost
|
||
|
||
total_win += res["win"]
|
||
|
||
# 校验余额是否正确
|
||
assert (
|
||
f"{abs(begin_balance - game.balance):.2f}"
|
||
== f"{abs(total_bet - total_win):.2f}"
|
||
)
|
||
|
||
return (total_win / total_bet) * 100
|
||
|
||
def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
|
||
game = SugarRush1000()
|
||
scotter_counts = list(self.weights.keys())
|
||
scotter_counts_weights = list(self.weights.values())
|
||
total_free_spins = 0
|
||
for _ in range(spins):
|
||
scatters_count = random.choices(
|
||
scotter_counts, weights=scotter_counts_weights, k=1
|
||
)[0]
|
||
total_free_spins += game._add_free_spins(int(scatters_count))
|
||
print(f"购买 {spins} 次免费旋转,实际免费旋转次数 {total_free_spins} 次")
|
||
|
||
return total_free_spins / spins
|
||
|
||
def tune(self, iterations: int = 50):
|
||
"""
|
||
迭代调整权重
|
||
"""
|
||
history = []
|
||
|
||
for i in range(iterations):
|
||
# 1. 模拟当前 RTP
|
||
current_rtp = 0
|
||
if self.feature == "normal":
|
||
current_rtp = self.simulate_one_batch(self.batch_spins)
|
||
else:
|
||
current_rtp = self.simulate_free_batch(self.batch_spins, self.feature)
|
||
|
||
error = current_rtp - self.target_rtp
|
||
|
||
history.append(current_rtp)
|
||
|
||
print(
|
||
f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
|
||
)
|
||
|
||
# 2. 检查是否收敛
|
||
if abs(error) < 0.1:
|
||
print(f"收敛成功!最终 RTP: {current_rtp:.4f}%")
|
||
break
|
||
|
||
# 3. 动态调整权重
|
||
# 调整策略:如果 RTP 太高,降低高价值符号的权重,提高低价值符号权重
|
||
# 调整量 = 误差 * 学习率 * 符号价值系数
|
||
# 防止初始误差过大导致权重崩塌
|
||
safe_error = max(min(error, 5.0), -5.0)
|
||
adjustment_factor = safe_error * self.learning_rate
|
||
|
||
# 防止权重变为负数
|
||
min_weight = 0
|
||
|
||
for sym in self.symbol_keys:
|
||
value = self.symbols_config[sym]
|
||
|
||
# 核心算法:
|
||
# 如果 RTP > Target (Error > 0),我们需要降低 RTP。
|
||
# 对于高价值符号 (value大),我们需要减小其权重。
|
||
# Adjustment 应该是负的。所以: - adjustment_factor * value
|
||
|
||
# 如果 RTP < Target (Error < 0),我们需要提高 RTP。
|
||
# 对于高价值符号,我们需要增加其权重。
|
||
# Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的,负负得正)
|
||
|
||
delta = -adjustment_factor * value * self.weights[sym]
|
||
|
||
# --- 优化:限制单次最大调整幅度 ---
|
||
# 防止某次调整幅度超过权重的 40%,给算法留点“喘息”空间
|
||
max_change_ratio = 0.4
|
||
if abs(delta) > self.weights[sym] * max_change_ratio:
|
||
delta = np.sign(delta) * self.weights[sym] * max_change_ratio
|
||
|
||
new_weight = self.weights[sym] + delta
|
||
|
||
# 限制最小权重,防止符号消失
|
||
if new_weight < min_weight:
|
||
new_weight = min_weight
|
||
|
||
if new_weight > 500:
|
||
new_weight = 500
|
||
|
||
self.weights[sym] = new_weight
|
||
|
||
return self.weights, history
|
||
|
||
|
||
# --- 运行自动调优 ---
|
||
def train_weights():
|
||
print("开始训练权重...")
|
||
# 设置随机种子
|
||
seed = 42
|
||
random.seed(seed)
|
||
np.random.seed(seed)
|
||
|
||
print(f"随机种子: {seed}")
|
||
|
||
fast = {"iterations": 100, "spins": 50_0000}
|
||
# middle = {"iterations": 15, "spins": 5_0000}
|
||
# slow = {"iterations": 10, "spins": 10_0000}
|
||
|
||
current = fast
|
||
configs = [
|
||
("fast", fast),
|
||
# ("middle", middle), ("slow", slow)
|
||
]
|
||
|
||
tuner = SugarRushAutoTuner(
|
||
target_rtp=11,
|
||
batch_spins=current["spins"],
|
||
weights={
|
||
"3": 7.300320151890674,
|
||
"4": 4.256044392311739,
|
||
"5": 0.8531843171577663,
|
||
"6": 0.21882648882051972,
|
||
"7": 0.05364159605552199,
|
||
},
|
||
feature="standard",
|
||
)
|
||
for name, config in configs:
|
||
begin_weights = tuner.weights.copy()
|
||
print(f"# {name}调优")
|
||
tuner.batch_spins = config["spins"]
|
||
final_weights, rtp_history = tuner.tune(iterations=config["iterations"])
|
||
|
||
print("\n=== 调优前的符号权重 ===")
|
||
for sym, w in begin_weights.items():
|
||
print(f"{sym}: {w:.2f}")
|
||
print("\n 符号出现概率 (%) ===")
|
||
for sym, w in begin_weights.items():
|
||
print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")
|
||
|
||
print("\n=== 最终调整后的符号权重 ===")
|
||
print(json.dumps(final_weights, indent=4, ensure_ascii=False))
|
||
|
||
# 将权重转换为概率百分比
|
||
total_w = sum(final_weights.values())
|
||
print("\n=== 符号出现概率 (%) ===")
|
||
for sym, w in final_weights.items():
|
||
print(f"{sym}: {(w/total_w)*100:.2f}%")
|
||
|
||
|
||
def verify():
|
||
print("开始进行权重校验:")
|
||
for i in range(10):
|
||
tuner = SugarRushAutoTuner(
|
||
target_rtp=56,
|
||
batch_spins=10_0000,
|
||
weights={
|
||
"A": 12.048116942034044,
|
||
"B": 23.797750079057277,
|
||
"C": 38.01412853023891,
|
||
"D": 49.64407598502997,
|
||
"E": 59.15353799355775,
|
||
"F": 70.33559681987649,
|
||
"G": 83.45720386881379,
|
||
"S": 1.0979547602954776,
|
||
},
|
||
)
|
||
print(f"第 {i+1}/10 次校验:")
|
||
tuner.tune(iterations=1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
if len(sys.argv) >= 2:
|
||
verify()
|
||
else:
|
||
train_weights()
|