Initial commit

This commit is contained in:
2026-04-23 16:58:11 +08:00
commit 267eba1eca
2582 changed files with 273338 additions and 0 deletions

167
pyground/sugar/统计.py Normal file
View File

@@ -0,0 +1,167 @@
import pandas as pd
import matplotlib.pyplot as plt
import logging
import sys
from datetime import datetime
import pymysql
# ================= 配置部分 =================
# 日志配置
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(
f'game_stats_{datetime.now().strftime("%Y%m%d")}.log', encoding="utf-8"
),
],
)
# 数据库连接配置 (请根据实际情况修改)
DB_CONFIG = {
"host": "localhost",
"user": "root",
"password": "123456",
"database": "fire",
"charset": "utf8mb4",
}
# ================= 核心逻辑 =================
def get_db_connection():
"""获取数据库连接"""
try:
conn = pymysql.connect(**DB_CONFIG)
logging.info("数据库连接成功")
return conn
except Exception as e:
logging.error(f"数据库连接失败: {e}")
raise
def fetch_data(conn, type):
"""从数据库获取数据"""
query = f"SELECT gid, score, feature, extra_free, create_time FROM game_sugar_spin{type}"
try:
logging.info(f"正在执行查询: {query}")
df = pd.read_sql(query, conn)
logging.info(f"数据加载完成,共 {len(df)}")
return df
except Exception as e:
logging.error(f"数据查询失败: {e}")
raise
def analyze_data(df):
"""使用 Pandas 进行统计分析"""
logging.info("开始进行统计分析...")
# 1. 基础统计
basic_stats = df["score"].describe().to_frame().T
logging.info(f"\n基础统计信息:\n{basic_stats.to_string()}")
# 2. 统计得分为 0 的数量
zero_score_count = (df["score"] == 0).sum()
total_count = len(df)
zero_ratio = (
(zero_score_count / total_count * 100).round(2) if total_count > 0 else 0
)
logging.info(f"\n得分为 0 的统计: {zero_score_count} 次 (占比: {zero_ratio}%)")
# 2.1 统计免费旋转大于0的数量
extra_free_count = (df["extra_free"] > 0).sum()
total_count = len(df)
zero_ratio = (
(extra_free_count / total_count * 100).round(2) if total_count > 0 else 0
)
logging.info(
f"\n免费旋转次数大于 0 的统计: {extra_free_count} 次 (占比: {zero_ratio}%)"
)
# 3. 得分区间分布
bins = [0, 0.1, 0.5, 1, 10, 50, 100, 500, float("inf")]
labels = ["0-0.1", "0.1-0.5", "0.5-1", "1-10", "11-50", "51-100", "101-500", "500+"]
df["score_range"] = pd.cut(df["score"], bins=bins, labels=labels, right=False)
# 计算数量
range_dist = df["score_range"].value_counts().sort_index().to_frame(name="count")
# 计算比例 (百分比)
range_dist["percentage"] = (range_dist["count"] / len(df) * 100).round(2)
# 格式化日志输出,拼接百分比
# 使用 apply 将每一行转换为 "数量 (百分比%)" 的格式
log_output = range_dist.apply(
lambda row: f"{row['count']} ({row['percentage']}%)", axis=1
).to_string(name=False)
logging.info(f"\n得分区间分布 (数量 | 占比):\n{log_output}")
return basic_stats, range_dist
def visualize_data(range_dist):
"""生成可视化图表"""
logging.info("正在生成图表...")
plt.rcParams["font.sans-serif"] = ["SimHei"] # 用来正常显示中文标签
plt.rcParams["axes.unicode_minus"] = False # 用来正常显示负号
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# 图2: 得分区间分布 (饼图)
colors = [
"#ff9999",
"#ffb3e6",
"#66b3ff",
"#99ff99",
"#ffcc99",
"#c2c2f0",
"#ffb3e6",
]
range_dist["count"].plot(
kind="pie", ax=axes[1], autopct="%1.1f%%", startangle=90, colors=colors
)
axes[1].set_title("得分区间分布占比", fontsize=14)
axes[1].set_ylabel("") # 隐藏y轴标签
plt.tight_layout()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"game_score_stats_{timestamp}.png"
plt.savefig(filename)
logging.info(f"图表已保存为: {filename}")
# plt.show() # 如需弹窗显示可取消注释
def main():
start_time = datetime.now()
logging.info(f"任务开始执行: {start_time}")
conn = None
try:
conn = get_db_connection()
df = fetch_data(conn, "")
# 打印前5行数据预览
# logging.info("数据预览:\n" + df.head().to_string())
basic_stats, range_dist = analyze_data(df)
# visualize_data(range_dist)
basic_stats, range_dist = analyze_data(df.query("extra_free > 0"))
except Exception as e:
logging.error(f"任务执行出错: {e}")
finally:
if conn:
conn.close()
logging.info("数据库连接已关闭")
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
logging.info(f"任务执行完成,耗时: {duration:.2f}")
if __name__ == "__main__":
main()