Files
roll-room/pyground/sugar/SugarRushAutoRTPTuner.py
2026-04-23 16:58:11 +08:00

340 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import random
import sys
import numpy as np
from typing import Dict, List
from SugarRush1000 import SugarRush1000
"""
符号的价值的含义是对RTP调整的影响程度。符号价值越高符号概率的调整对RTP的影响越大。比如当前RTP过高要降低RTP,算法优先降低高价值符号的概率提升低价值符号的概率。结果就是高价值符号出现频率降低直接拉动了RTP的降低低价值符号出现频率提升又提升了级联效果的流畅性。
所有符号的价值不需要考虑归一化,但是要考虑比例。
调整权重的公式deletaWeight = -(Error)x(LearningRate)x(SymbolsValue)x(CurrentWieght)
相对性原理:算法只关心“符号 A 的价值是符号 B 的多少倍”。如果 A 的价值是 2.0B 是 1.0。当RTP偏高时算法会让 A 的权重减少幅度是 B 的 2倍。
比例要合理不能差距过大否则调优过程非常不稳定。比如符号A Value=1000,符号B value=0.001, 会导致算法对符号A极其敏感稍微一点误差就会让A的权重剧烈震荡而符号B几乎完全不动。
每个符号的价值配置公式:
Value(symbol)约是BasePay(symbol)xVolatilityFactor
BasePay:是符号的期望赔率
VolatilityFactor:是符号的波动因子,人工设定的修正值。
* 对于Scotter:它没有基础赔付但是能触发免费旋转Scatter的潜在价值远高于普通符号所以它的Value应该设置得更高。
* 对于高赔率符号系数设为1.2~1.5。高赔率对RTP影响很大调整它们可以很快让RTP收敛。
* 对于低赔率符号系数设为0.8~1.0。它们主要贡献消除次数和触发级联效果但是本身赔率低对RTP的边际贡献较低
"""
fast = {"iterations": 30, "spins": 100}
middle = {"iterations": 15, "spins": 5_0000}
slow = {"iterations": 10, "spins": 10_0000}
config = {
"scotter_count_weights": {
"3": 7.26421894353717,
"4": 4.1724734692682395,
"5": 0.8119106579617028,
"6": 0.20313929837878217,
"7": 0.04857599989214818,
},
"target_rtp": 85,
"values": {
"S": 0.5,
"A": 0.4,
"B": 0.6,
"C": 0.8,
"D": 1,
"E": 1.2,
"F": 1.4,
"G": 0.6,
},
"weights": {
"A": 19.015957779792195,
"B": 21.291015318701493,
"C": 31.66660200727613,
"D": 35.193596023259865,
"E": 48.7122724047052,
"F": 64.49005324700025,
"G": 21.291015318701493,
"S": 2.6840958157151236,
},
"iterators": [
{"name": "fast", "iterations": 30, "spins": 10000},
],
"feature": "standard",
}
class SugarRushAutoTuner:
def __init__(
self,
target_rtp: float = 96.5,
values=None,
weights=None,
scotter_count_weights=None,
feature: str = "normal",
):
self.target_rtp = target_rtp
self.rows = 7
self.cols = 7
self.bet = 1.0
self.feature = feature
if scotter_count_weights:
self.scotter_count_weights = scotter_count_weights
# 符号定义及其价值(价值越高,对 RTP 影响越大)
self.symbols_config = values
# 初始权重
self.symbol_keys = list(self.symbols_config.keys())
if weights is not None:
self.weights = weights
else:
self.weights = {k: 20.0 for k in self.symbol_keys}
# 学习率 (控制调整速度,太小收敛慢,太大容易震荡)
self.learning_rate = 0.01 # 0.002
print(f"初始化自动调优器feature:{feature} 目标 RTP: {self.target_rtp}%")
def simulate_one_batch(self, spins: int = 20000):
begin_balance = 1_0000_0000
game = SugarRush1000(
balance=begin_balance,
weights=self.weights,
scotter_counts_weights=self.scotter_count_weights,
)
total_bet = 0.0
total_win = 0.0
total_scotter = 0
total_has_scotter = 0
for _ in range(spins):
# 执行旋转
res = game.doSpin()
if res["error"]:
break
# 统计数据
actual_cost = res["actual_bet"]
if actual_cost > 0:
total_bet += actual_cost
if res["is_scotter"]:
total_scotter += 1
has_scotter = res["is_scotter"]
if not has_scotter:
if has_scotter:
break
for row in res["grid"]:
for symbol in row:
if symbol == "S":
has_scotter = True
break
if has_scotter:
total_has_scotter += 1
total_win += res["win"]
# 校验余额是否正确
assert (
f"{abs(begin_balance - game.balance):.2f}"
== f"{abs(total_bet - total_win):.2f}"
)
print(
f"旋转{spins}scotter {total_scotter}次, containe scotter {total_has_scotter}"
)
return (total_win / total_bet) * 100
def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
begin_balance = 1_0000_0000
game = SugarRush1000(
balance=begin_balance,
weights=self.weights,
scotter_counts_weights=self.scotter_count_weights,
)
total_bet = 0.0
total_win = 0.0
total_spins = 0
total_free_spins = 0
total_scotter = 0
for _ in range(spins):
r = game.buy_free_spins(buy_type)
total_bet += r["cost"]
# 执行旋转
score = 0
can_spins = 1
while can_spins > 0:
can_spins -= 1
total_spins += 1
res = game.doSpin()
if res["error"]:
break
# 统计数据
score += res["win"]
total_win += res["win"]
if res["is_scotter"]:
total_scotter += 1
total_free_spins += res["added_spins"]
if res["free_spins_remaining"] >= 0:
can_spins = res["free_spins_remaining"]
if score != res["spin_total_win"]:
print(
"total_win != res[spin_total_win]", total_win, res["spin_total_win"]
)
assert score == res["spin_total_win"]
# 校验余额是否正确
assert (
f"{abs(begin_balance - game.balance):.2f}"
== f"{abs(total_bet - total_win):.2f}"
)
return (total_win / total_bet) * 100
def tune(self, iterations: int = 50, batch_spins: int = 20000):
"""
迭代调整权重
"""
history = []
for i in range(iterations):
# 1. 模拟当前 RTP
current_rtp = 0
if self.feature == "normal":
current_rtp = self.simulate_one_batch(batch_spins)
else:
current_rtp = self.simulate_free_batch(batch_spins, self.feature)
error = current_rtp - self.target_rtp
history.append(current_rtp)
print(
f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
)
# 2. 检查是否收敛
if abs(error) < 3:
print(f"收敛成功!最终 RTP: {current_rtp:.4f}%")
break
# 3. 动态调整权重
# 调整策略:如果 RTP 太高,降低高价值符号的权重,提高低价值符号权重
# 调整量 = 误差 * 学习率 * 符号价值系数
# 防止初始误差过大导致权重崩塌
safe_error = max(min(error, 5.0), -5.0)
adjustment_factor = safe_error * self.learning_rate
# 防止权重变为负数
min_weight = 0
for sym in self.symbol_keys:
value = self.symbols_config[sym]
# 核心算法:
# 如果 RTP > Target (Error > 0),我们需要降低 RTP。
# 对于高价值符号 (value大),我们需要减小其权重。
# Adjustment 应该是负的。所以: - adjustment_factor * value
# 如果 RTP < Target (Error < 0),我们需要提高 RTP。
# 对于高价值符号,我们需要增加其权重。
# Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的负负得正)
delta = -adjustment_factor * value * self.weights[sym]
# --- 优化:限制单次最大调整幅度 ---
# 防止某次调整幅度超过权重的 40%,给算法留点“喘息”空间
max_change_ratio = 0.4
if abs(delta) > self.weights[sym] * max_change_ratio:
delta = np.sign(delta) * self.weights[sym] * max_change_ratio
new_weight = self.weights[sym] + delta
# 限制最小权重,防止符号消失
if new_weight < min_weight:
new_weight = min_weight
if new_weight > 500:
new_weight = 500
self.weights[sym] = new_weight
return self.weights, history
# --- 运行自动调优 ---
def train_weights():
print("开始训练权重...")
# 设置随机种子
seed = random.randint(1, 1000)
random.seed(seed)
np.random.seed(seed)
print(f"随机种子: {seed}")
print(f"符号价值: ")
print(json.dumps(config["values"], indent=4, ensure_ascii=False))
tuner = SugarRushAutoTuner(
target_rtp=config["target_rtp"],
values=config["values"],
weights=config["weights"],
feature=config["feature"],
scotter_count_weights=config["scotter_count_weights"],
)
for t in config["iterators"]:
name, iterations, spins = t.values()
begin_weights = tuner.weights.copy()
print(f"# {name}调优")
final_weights, rtp_history = tuner.tune(
iterations=iterations, batch_spins=spins
)
print("\n=== 调优前的符号权重 ===")
for sym, w in begin_weights.items():
print(f"{sym}: {w:.2f}")
print("\n 符号出现概率 (%) ===")
for sym, w in begin_weights.items():
print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")
print("\n=== 最终调整后的符号权重 ===")
print(json.dumps(final_weights, indent=4, ensure_ascii=False))
# 将权重转换为概率百分比
total_w = sum(final_weights.values())
print("\n=== 符号出现概率 (%) ===")
for sym, w in final_weights.items():
print(f"{sym}: {(w/total_w)*100:.2f}%")
def verify():
print("开始进行权重校验:")
for i in range(10):
tuner = SugarRushAutoTuner(
target_rtp=config["target_rtp"],
values=config["values"],
weights={
"A": 18.18442161117576,
"B": 19.994169181485578,
"C": 29.28572430711806,
"D": 32.143058029000244,
"E": 44.060855546850874,
"F": 57.93237542185442,
"G": 19.994169181485578,
"S": 2.5426663862665424,
},
scotter_count_weights=config["scotter_count_weights"],
feature="super",
)
print(f"{i+1}/10 次校验:")
tuner.tune(iterations=1, batch_spins=10000)
if __name__ == "__main__":
if len(sys.argv) >= 2:
verify()
else:
train_weights()