Initial commit
This commit is contained in:
339
pyground/sugar/SugarRushAutoRTPTuner.py
Normal file
339
pyground/sugar/SugarRushAutoRTPTuner.py
Normal file
@@ -0,0 +1,339 @@
|
||||
import json
|
||||
import random
|
||||
import sys
|
||||
import numpy as np
|
||||
from typing import Dict, List
|
||||
|
||||
from SugarRush1000 import SugarRush1000
|
||||
|
||||
"""
|
||||
符号的价值的含义是对RTP调整的影响程度。符号价值越高,符号概率的调整对RTP的影响越大。比如当前RTP过高,要降低RTP,算法优先降低高价值符号的概率,提升低价值符号的概率。结果就是高价值符号出现频率降低,直接拉动了RTP的降低,低价值符号出现频率提升,又提升了级联效果的流畅性。
|
||||
|
||||
所有符号的价值不需要考虑归一化,但是要考虑比例。
|
||||
调整权重的公式:deletaWeight = -(Error)x(LearningRate)x(SymbolsValue)x(CurrentWieght)
|
||||
相对性原理:算法只关心“符号 A 的价值是符号 B 的多少倍”。如果 A 的价值是 2.0,B 是 1.0。当RTP偏高时,算法会让 A 的权重减少幅度是 B 的 2倍。
|
||||
|
||||
比例要合理,不能差距过大,否则调优过程非常不稳定。比如符号A Value=1000,符号B value=0.001, 会导致算法对符号A极其敏感,稍微一点误差就会让A的权重剧烈震荡,而符号B几乎完全不动。
|
||||
|
||||
每个符号的价值配置公式:
|
||||
Value(symbol)约是BasePay(symbol)xVolatilityFactor
|
||||
BasePay:是符号的期望赔率
|
||||
VolatilityFactor:是符号的波动因子,人工设定的修正值。
|
||||
* 对于Scotter:它没有基础赔付,但是能触发免费旋转,Scatter的潜在价值远高于普通符号,所以它的Value应该设置得更高。
|
||||
* 对于高赔率符号:系数设为1.2~1.5。高赔率对RTP影响很大,调整它们可以很快让RTP收敛。
|
||||
* 对于低赔率符号:系数设为0.8~1.0。它们主要贡献消除次数和触发级联效果,但是本身赔率低,对RTP的边际贡献较低
|
||||
"""
|
||||
|
||||
fast = {"iterations": 30, "spins": 100}
|
||||
middle = {"iterations": 15, "spins": 5_0000}
|
||||
slow = {"iterations": 10, "spins": 10_0000}
|
||||
|
||||
config = {
|
||||
"scotter_count_weights": {
|
||||
"3": 7.26421894353717,
|
||||
"4": 4.1724734692682395,
|
||||
"5": 0.8119106579617028,
|
||||
"6": 0.20313929837878217,
|
||||
"7": 0.04857599989214818,
|
||||
},
|
||||
"target_rtp": 85,
|
||||
"values": {
|
||||
"S": 0.5,
|
||||
"A": 0.4,
|
||||
"B": 0.6,
|
||||
"C": 0.8,
|
||||
"D": 1,
|
||||
"E": 1.2,
|
||||
"F": 1.4,
|
||||
"G": 0.6,
|
||||
},
|
||||
"weights": {
|
||||
"A": 19.015957779792195,
|
||||
"B": 21.291015318701493,
|
||||
"C": 31.66660200727613,
|
||||
"D": 35.193596023259865,
|
||||
"E": 48.7122724047052,
|
||||
"F": 64.49005324700025,
|
||||
"G": 21.291015318701493,
|
||||
"S": 2.6840958157151236,
|
||||
},
|
||||
"iterators": [
|
||||
{"name": "fast", "iterations": 30, "spins": 10000},
|
||||
],
|
||||
"feature": "standard",
|
||||
}
|
||||
|
||||
|
||||
class SugarRushAutoTuner:
|
||||
def __init__(
|
||||
self,
|
||||
target_rtp: float = 96.5,
|
||||
values=None,
|
||||
weights=None,
|
||||
scotter_count_weights=None,
|
||||
feature: str = "normal",
|
||||
):
|
||||
self.target_rtp = target_rtp
|
||||
self.rows = 7
|
||||
self.cols = 7
|
||||
self.bet = 1.0
|
||||
self.feature = feature
|
||||
if scotter_count_weights:
|
||||
self.scotter_count_weights = scotter_count_weights
|
||||
# 符号定义及其价值(价值越高,对 RTP 影响越大)
|
||||
self.symbols_config = values
|
||||
|
||||
# 初始权重
|
||||
self.symbol_keys = list(self.symbols_config.keys())
|
||||
if weights is not None:
|
||||
self.weights = weights
|
||||
else:
|
||||
self.weights = {k: 20.0 for k in self.symbol_keys}
|
||||
|
||||
# 学习率 (控制调整速度,太小收敛慢,太大容易震荡)
|
||||
self.learning_rate = 0.01 # 0.002
|
||||
|
||||
print(f"初始化自动调优器,feature:{feature} 目标 RTP: {self.target_rtp}%")
|
||||
|
||||
def simulate_one_batch(self, spins: int = 20000):
|
||||
begin_balance = 1_0000_0000
|
||||
game = SugarRush1000(
|
||||
balance=begin_balance,
|
||||
weights=self.weights,
|
||||
scotter_counts_weights=self.scotter_count_weights,
|
||||
)
|
||||
|
||||
total_bet = 0.0
|
||||
total_win = 0.0
|
||||
total_scotter = 0
|
||||
total_has_scotter = 0
|
||||
for _ in range(spins):
|
||||
# 执行旋转
|
||||
res = game.doSpin()
|
||||
if res["error"]:
|
||||
break
|
||||
|
||||
# 统计数据
|
||||
actual_cost = res["actual_bet"]
|
||||
if actual_cost > 0:
|
||||
total_bet += actual_cost
|
||||
if res["is_scotter"]:
|
||||
total_scotter += 1
|
||||
|
||||
has_scotter = res["is_scotter"]
|
||||
if not has_scotter:
|
||||
if has_scotter:
|
||||
break
|
||||
for row in res["grid"]:
|
||||
for symbol in row:
|
||||
if symbol == "S":
|
||||
has_scotter = True
|
||||
break
|
||||
if has_scotter:
|
||||
total_has_scotter += 1
|
||||
|
||||
total_win += res["win"]
|
||||
|
||||
# 校验余额是否正确
|
||||
assert (
|
||||
f"{abs(begin_balance - game.balance):.2f}"
|
||||
== f"{abs(total_bet - total_win):.2f}"
|
||||
)
|
||||
print(
|
||||
f"旋转{spins} 次,scotter {total_scotter}次, containe scotter {total_has_scotter}"
|
||||
)
|
||||
return (total_win / total_bet) * 100
|
||||
|
||||
def simulate_free_batch(self, spins: int = 20000, buy_type: str = "standard"):
|
||||
begin_balance = 1_0000_0000
|
||||
game = SugarRush1000(
|
||||
balance=begin_balance,
|
||||
weights=self.weights,
|
||||
scotter_counts_weights=self.scotter_count_weights,
|
||||
)
|
||||
|
||||
total_bet = 0.0
|
||||
total_win = 0.0
|
||||
|
||||
total_spins = 0
|
||||
total_free_spins = 0
|
||||
total_scotter = 0
|
||||
for _ in range(spins):
|
||||
r = game.buy_free_spins(buy_type)
|
||||
total_bet += r["cost"]
|
||||
# 执行旋转
|
||||
score = 0
|
||||
can_spins = 1
|
||||
while can_spins > 0:
|
||||
can_spins -= 1
|
||||
|
||||
total_spins += 1
|
||||
res = game.doSpin()
|
||||
if res["error"]:
|
||||
break
|
||||
|
||||
# 统计数据
|
||||
score += res["win"]
|
||||
total_win += res["win"]
|
||||
if res["is_scotter"]:
|
||||
total_scotter += 1
|
||||
total_free_spins += res["added_spins"]
|
||||
if res["free_spins_remaining"] >= 0:
|
||||
can_spins = res["free_spins_remaining"]
|
||||
if score != res["spin_total_win"]:
|
||||
print(
|
||||
"total_win != res[spin_total_win]", total_win, res["spin_total_win"]
|
||||
)
|
||||
assert score == res["spin_total_win"]
|
||||
# 校验余额是否正确
|
||||
assert (
|
||||
f"{abs(begin_balance - game.balance):.2f}"
|
||||
== f"{abs(total_bet - total_win):.2f}"
|
||||
)
|
||||
|
||||
return (total_win / total_bet) * 100
|
||||
|
||||
def tune(self, iterations: int = 50, batch_spins: int = 20000):
|
||||
"""
|
||||
迭代调整权重
|
||||
"""
|
||||
history = []
|
||||
|
||||
for i in range(iterations):
|
||||
# 1. 模拟当前 RTP
|
||||
current_rtp = 0
|
||||
if self.feature == "normal":
|
||||
current_rtp = self.simulate_one_batch(batch_spins)
|
||||
else:
|
||||
current_rtp = self.simulate_free_batch(batch_spins, self.feature)
|
||||
|
||||
error = current_rtp - self.target_rtp
|
||||
|
||||
history.append(current_rtp)
|
||||
|
||||
print(
|
||||
f"迭代 {i+1}/{iterations} | RTP: {current_rtp:.4f}% | 目标: {self.target_rtp}% | 误差: {error:+.4f}%"
|
||||
)
|
||||
|
||||
# 2. 检查是否收敛
|
||||
if abs(error) < 3:
|
||||
print(f"收敛成功!最终 RTP: {current_rtp:.4f}%")
|
||||
break
|
||||
|
||||
# 3. 动态调整权重
|
||||
# 调整策略:如果 RTP 太高,降低高价值符号的权重,提高低价值符号权重
|
||||
# 调整量 = 误差 * 学习率 * 符号价值系数
|
||||
# 防止初始误差过大导致权重崩塌
|
||||
safe_error = max(min(error, 5.0), -5.0)
|
||||
adjustment_factor = safe_error * self.learning_rate
|
||||
|
||||
# 防止权重变为负数
|
||||
min_weight = 0
|
||||
|
||||
for sym in self.symbol_keys:
|
||||
value = self.symbols_config[sym]
|
||||
|
||||
# 核心算法:
|
||||
# 如果 RTP > Target (Error > 0),我们需要降低 RTP。
|
||||
# 对于高价值符号 (value大),我们需要减小其权重。
|
||||
# Adjustment 应该是负的。所以: - adjustment_factor * value
|
||||
|
||||
# 如果 RTP < Target (Error < 0),我们需要提高 RTP。
|
||||
# 对于高价值符号,我们需要增加其权重。
|
||||
# Adjustment 应该是正的。所以: - adjustment_factor * value (因为error是负的,负负得正)
|
||||
|
||||
delta = -adjustment_factor * value * self.weights[sym]
|
||||
|
||||
# --- 优化:限制单次最大调整幅度 ---
|
||||
# 防止某次调整幅度超过权重的 40%,给算法留点“喘息”空间
|
||||
max_change_ratio = 0.4
|
||||
if abs(delta) > self.weights[sym] * max_change_ratio:
|
||||
delta = np.sign(delta) * self.weights[sym] * max_change_ratio
|
||||
|
||||
new_weight = self.weights[sym] + delta
|
||||
|
||||
# 限制最小权重,防止符号消失
|
||||
if new_weight < min_weight:
|
||||
new_weight = min_weight
|
||||
|
||||
if new_weight > 500:
|
||||
new_weight = 500
|
||||
|
||||
self.weights[sym] = new_weight
|
||||
|
||||
return self.weights, history
|
||||
|
||||
|
||||
# --- 运行自动调优 ---
|
||||
def train_weights():
|
||||
print("开始训练权重...")
|
||||
# 设置随机种子
|
||||
seed = random.randint(1, 1000)
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
|
||||
print(f"随机种子: {seed}")
|
||||
print(f"符号价值: ")
|
||||
print(json.dumps(config["values"], indent=4, ensure_ascii=False))
|
||||
|
||||
tuner = SugarRushAutoTuner(
|
||||
target_rtp=config["target_rtp"],
|
||||
values=config["values"],
|
||||
weights=config["weights"],
|
||||
feature=config["feature"],
|
||||
scotter_count_weights=config["scotter_count_weights"],
|
||||
)
|
||||
|
||||
for t in config["iterators"]:
|
||||
name, iterations, spins = t.values()
|
||||
begin_weights = tuner.weights.copy()
|
||||
print(f"# {name}调优")
|
||||
final_weights, rtp_history = tuner.tune(
|
||||
iterations=iterations, batch_spins=spins
|
||||
)
|
||||
|
||||
print("\n=== 调优前的符号权重 ===")
|
||||
for sym, w in begin_weights.items():
|
||||
print(f"{sym}: {w:.2f}")
|
||||
print("\n 符号出现概率 (%) ===")
|
||||
for sym, w in begin_weights.items():
|
||||
print(f"{sym}: {(w/sum(begin_weights.values()))*100:.2f}%")
|
||||
|
||||
print("\n=== 最终调整后的符号权重 ===")
|
||||
print(json.dumps(final_weights, indent=4, ensure_ascii=False))
|
||||
|
||||
# 将权重转换为概率百分比
|
||||
total_w = sum(final_weights.values())
|
||||
print("\n=== 符号出现概率 (%) ===")
|
||||
for sym, w in final_weights.items():
|
||||
print(f"{sym}: {(w/total_w)*100:.2f}%")
|
||||
|
||||
|
||||
def verify():
|
||||
print("开始进行权重校验:")
|
||||
for i in range(10):
|
||||
tuner = SugarRushAutoTuner(
|
||||
target_rtp=config["target_rtp"],
|
||||
values=config["values"],
|
||||
weights={
|
||||
"A": 18.18442161117576,
|
||||
"B": 19.994169181485578,
|
||||
"C": 29.28572430711806,
|
||||
"D": 32.143058029000244,
|
||||
"E": 44.060855546850874,
|
||||
"F": 57.93237542185442,
|
||||
"G": 19.994169181485578,
|
||||
"S": 2.5426663862665424,
|
||||
},
|
||||
scotter_count_weights=config["scotter_count_weights"],
|
||||
feature="super",
|
||||
)
|
||||
print(f"第 {i+1}/10 次校验:")
|
||||
tuner.tune(iterations=1, batch_spins=10000)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) >= 2:
|
||||
verify()
|
||||
else:
|
||||
train_weights()
|
||||
Reference in New Issue
Block a user