Beijing_air_quality_prediction/air_quality_prediction.ipynb

1600 lines
1.5 MiB
Plaintext
Raw Normal View History

2025-03-24 09:57:14 +08:00
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": [
"# 预测建模\n",
"北京市空气质量指数预测推荐难度系数10\n",
"\n",
"这个数据集是北京市2022年11月1日至2023年10月31日期间空气质量相关数据。\n",
"根据这个数据集,回答以下问题"
],
"id": "b610f839dca4877"
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:24.864528Z",
"start_time": "2025-03-26T08:40:24.859852Z"
2025-03-24 09:57:14 +08:00
}
},
2025-03-24 15:19:11 +08:00
"cell_type": "code",
2025-03-24 09:57:14 +08:00
"source": [
2025-03-24 15:19:11 +08:00
"import os\n",
"import sys\n",
"\n",
2025-03-24 09:57:14 +08:00
"#导入基础包\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
2025-03-24 17:06:38 +08:00
"\n",
2025-03-24 15:19:11 +08:00
"from statsmodels.graphics.tsaplots import plot_acf\n",
"import matplotlib.font_manager as fm\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-24 15:19:11 +08:00
"# 导入主成分分析相关包\n",
2025-03-24 09:57:14 +08:00
"from factor_analyzer import Rotator\n",
"from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo\n",
"\n",
2025-03-24 15:19:11 +08:00
"# 导入SARIMA相关包\n",
"from statsmodels.tsa.statespace.sarimax import SARIMAX\n",
2025-03-26 14:57:30 +08:00
"from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
"from pmdarima import auto_arima\n",
"import pmdarima as pm\n",
2025-03-24 15:19:11 +08:00
"\n",
"# 导入XGBOOST相关包\n",
2025-03-24 09:57:14 +08:00
"from xgboost import XGBRegressor\n",
"from scipy.stats import randint, uniform\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from matplotlib.dates import DateFormatter, HourLocator\n",
"\n",
2025-03-24 15:19:11 +08:00
"# 导入单独写的函数\n",
2025-03-24 09:57:14 +08:00
"from calculate import *\n",
"from heatmap import *\n",
"from sort_matrix import *"
],
2025-03-24 15:19:11 +08:00
"id": "initial_id",
2025-03-24 09:57:14 +08:00
"outputs": [],
2025-03-26 16:59:57 +08:00
"execution_count": 14
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:25.424563Z",
"start_time": "2025-03-26T08:40:24.935756Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
2025-03-24 15:19:11 +08:00
"# 设置字体\n",
"if sys.platform == 'darwin': # macOS\n",
" font_path = '/System/Library/Fonts/STHeiti Light.ttc'\n",
"elif sys.platform == 'win32': # Windows\n",
" plt.rcParams['font.sans-serif'] = ['SimHei'] # Windows系统自带黑体\n",
" plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题\n",
"else: # Linux/其他系统\n",
" font_path = '/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc' # 文泉驿字体\n",
"\n",
"# 仅非Windows系统需要加载字体文件\n",
"if sys.platform != 'win32':\n",
" try:\n",
" font_prop = fm.FontProperties(fname=font_path)\n",
" plt.rcParams['font.family'] = font_prop.get_name()\n",
" except:\n",
" print(f\"警告:{font_path} 字体加载失败,请检查路径有效性\")\n",
"\n",
"try:\n",
" os.mkdir('./images')\n",
"except FileExistsError:\n",
2025-03-24 17:31:14 +08:00
" pass\n",
2025-03-26 14:57:30 +08:00
"try:\n",
" os.mkdir('./results')\n",
"except FileExistsError:\n",
" pass\n",
2025-03-24 17:31:14 +08:00
"#读取数据\n",
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
"data.head()"
2025-03-24 09:57:14 +08:00
],
"id": "92ea7ba1218799cd",
2025-03-24 17:31:14 +08:00
"outputs": [
{
"data": {
"text/plain": [
" date hour AQI CO NO2 O3 PM10 \\\n",
"0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n",
"1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n",
"2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n",
"3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n",
"4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n",
"\n",
" PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n",
"0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n",
"1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n",
"2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n",
"3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n",
"4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n",
"\n",
" RRR tR \n",
"0 0.0 12 \n",
"1 0.0 12 \n",
"2 0.0 12 \n",
"3 0.0 12 \n",
"4 0.0 12 \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>hour</th>\n",
" <th>AQI</th>\n",
" <th>CO</th>\n",
" <th>NO2</th>\n",
" <th>O3</th>\n",
" <th>PM10</th>\n",
" <th>PM2.5</th>\n",
" <th>SO2</th>\n",
" <th>T</th>\n",
" <th>...</th>\n",
" <th>P</th>\n",
" <th>Pa</th>\n",
" <th>U</th>\n",
" <th>Ff</th>\n",
" <th>Tn</th>\n",
" <th>Tx</th>\n",
" <th>VV</th>\n",
" <th>Td</th>\n",
" <th>RRR</th>\n",
" <th>tR</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2022-11-01</td>\n",
" <td>2</td>\n",
" <td>18.371429</td>\n",
" <td>0.211429</td>\n",
" <td>23.771429</td>\n",
" <td>29.057143</td>\n",
" <td>13.257143</td>\n",
" <td>3.057143</td>\n",
" <td>2.628571</td>\n",
" <td>6.7</td>\n",
" <td>...</td>\n",
" <td>770.5</td>\n",
" <td>0.1</td>\n",
" <td>36.0</td>\n",
" <td>1.0</td>\n",
" <td>5.3</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.3</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2022-11-01</td>\n",
" <td>5</td>\n",
" <td>21.914286</td>\n",
" <td>0.180000</td>\n",
" <td>26.571429</td>\n",
" <td>20.142857</td>\n",
" <td>18.914286</td>\n",
" <td>3.771429</td>\n",
" <td>2.542857</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>770.8</td>\n",
" <td>0.3</td>\n",
" <td>62.0</td>\n",
" <td>0.0</td>\n",
" <td>1.9</td>\n",
" <td>17.3</td>\n",
" <td>7.0</td>\n",
" <td>-4.5</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2022-11-01</td>\n",
" <td>8</td>\n",
" <td>28.628571</td>\n",
" <td>0.311429</td>\n",
" <td>30.028571</td>\n",
" <td>14.285714</td>\n",
" <td>27.942857</td>\n",
" <td>6.857143</td>\n",
" <td>2.400000</td>\n",
" <td>6.6</td>\n",
" <td>...</td>\n",
" <td>771.7</td>\n",
" <td>0.9</td>\n",
" <td>56.0</td>\n",
" <td>0.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>10.0</td>\n",
" <td>-7.1</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2022-11-01</td>\n",
" <td>11</td>\n",
" <td>19.000000</td>\n",
" <td>0.237143</td>\n",
" <td>17.971429</td>\n",
" <td>40.529412</td>\n",
" <td>17.852941</td>\n",
" <td>5.914286</td>\n",
" <td>2.176471</td>\n",
" <td>13.5</td>\n",
" <td>...</td>\n",
" <td>771.3</td>\n",
" <td>-0.4</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-9.7</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2022-11-01</td>\n",
" <td>14</td>\n",
" <td>21.742857</td>\n",
" <td>0.252941</td>\n",
" <td>15.588235</td>\n",
" <td>53.617647</td>\n",
" <td>20.941176</td>\n",
" <td>6.742857</td>\n",
" <td>2.000000</td>\n",
" <td>15.7</td>\n",
" <td>...</td>\n",
" <td>768.6</td>\n",
" <td>-2.7</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.9</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
]
},
2025-03-26 16:59:57 +08:00
"execution_count": 15,
2025-03-24 17:31:14 +08:00
"metadata": {},
"output_type": "execute_result"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 15
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目1\n",
"研究单日内空气质量指数与各项指标的变化趋势,这种趋势是否具有周期性?"
],
"id": "bca65e544d8bef55"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:25.448744Z",
"start_time": "2025-03-26T08:40:25.433568Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
2025-03-24 15:19:11 +08:00
"# 数据预处理:将数据按小时分组,计算每个小时各指标的平均值\n",
"# 转换Excel日期序列值为实际日期并分组\n",
"data['datetime'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n",
"valid_hours = sorted(data['hour'].unique())\n",
"hourly_data = data.groupby('hour').mean().loc[valid_hours]\n",
"plt.figure(figsize=(12, 8))\n",
"indicators = ['AQI', 'PM2.5', 'PM10', 'CO', 'NO2', 'O3','SO2']\n",
"colors = ['#2d87bb', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728',]\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-24 17:31:14 +08:00
"normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)"
2025-03-24 09:57:14 +08:00
],
2025-03-24 15:19:11 +08:00
"id": "118b1b48e798a7ba",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 1200x800 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 16
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:27.472610Z",
"start_time": "2025-03-26T08:40:25.476541Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
2025-03-24 15:19:11 +08:00
"source": [
"# 绘制各指标小时均值变化趋势(标准化后)折线图\n",
"for i, indicator in enumerate(indicators):\n",
" plt.plot(normalized.index, normalized[indicator], \n",
" marker='o',label=indicator, color=colors[i], linewidth=2)\n",
"\n",
"plt.title('各指标小时均值变化趋势(标准化后)', fontsize=14)\n",
"plt.xlabel('小时', fontsize=12)\n",
"plt.ylabel('标准化值', fontsize=12)\n",
"plt.xticks(range(0, 24))\n",
"plt.grid(alpha=0.3)\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
"plt.tight_layout()\n",
"\n",
"# 新增保存代码\n",
"plt.savefig('images/hourly_trends_combined.png', dpi=300, bbox_inches='tight') # 保存组合大图\n",
"plt.show()\n",
"\n",
"# 新增保存子图代码\n",
"for i, indicator in enumerate(indicators):\n",
" plt.figure(figsize=(8, 5))\n",
" plt.plot(normalized.index, normalized[indicator], \n",
" marker='o', color=colors[i], linewidth=2)\n",
" plt.title(f'{indicator}小时均值变化趋势(标准化后)')\n",
" plt.xlabel('小时')\n",
" plt.ylabel('标准化值')\n",
" plt.xticks(range(0, 24))\n",
" plt.grid(alpha=0.3)\n",
" plt.tight_layout()\n",
" plt.savefig(f'images/hourly_{indicator}.png', dpi=300) # 保存单个指标子图\n",
" plt.close()"
],
"id": "57dedbd9b7bbe12d",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAHWCAYAAADzS2TwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3xT1RfAvy9pm+49gFJKW/Yoe28BcTBERHAxFEV/IioOhgscgIOhoiguQHCgiAwVZU9ZsvdsgZbuvZI2ub8/0qYtXelI08L9fj75tPe9++49ee8lOe+cc89RhBACiUQikUgkEkmtQ2VtASQSiUQikUgkFUMqchKJRCKRSCS1FKnISSQSiUQikdRSpCInkUgkEolEUkuRipxEIpFIJBJJLUUqchKJRCKRSCS1FKnISSQSiUQikdRSpCInkUgkEolEUkuRitxtzNatW9mzZ0+Fj//mm2945513Cm3bvHkz586dK/dYV65c4a677mLv3r0Vlqcsfv/9dx599FFu3LhhsTkOHjzIuHHjOHz4cIl9vvrqK7788ssi27Oysnj44Yf5448/yjVnVlYWiYmJxe6Li4sjKyurXOMVJCYmhhkzZvDLL79UeIwDBw6QlpZWaNvRo0fZsWNHucd64oknmDFjRrH7xo8fz8svv4y5Oc4PHz7MihUr0Ol0hbZfu3aNb775hkuXLpVbPnMJCwsjIiKi2H2W/ixkZ2eza9cuU/vq1avs27evXGNU5TU1h7i4ODZs2FDid0tsbCwXLlywyNwSSY1HSG5bunXrJjw9PUVsbGyFjh85cqRo0aJFoW0ajUa8+OKLRfpmZWWJ8PDwEsc6c+aMAMQff/xh2nb+/Hlx8OBBcerUKXHmzJlCr1OnTokjR46Iq1evmi3vnDlzBFCqHJVlzZo1AhAbNmwosU/Hjh1Fu3btimzfsmWLAMQnn3xSrjlXrlwpnJycxNdff11kX9OmTYWjo6NITk4u15h5aLVa4e/vLzp06FCh44UQonHjxsLb21vEx8ebtt13333C39+/3GN17dpVPPTQQ0W2X7lyRahUKvHSSy+ZPdbrr78unJychMFgKLT99OnTAhA///xzueUzh+PHjws/Pz/x7LPPFru/qj4LaWlpxY6/efNmAYhJkyYJIYQYNGiQcHFxEQcOHDD7PVTVNc3IyBDR0dHi4sWL4vDhw+KPP/4QX375pXjzzTfF+PHjRe/evUWdOnUEIADRq1cvodfri4zz6quvCi8vL3Ho0KFyzS+R3ArYWE2DlFiVkydP8u+//zJlyhTi4uKIi4srtp+vry+enp7F7nNwcMDW1rbQNjs7u0LbLl68yA8//MBnn32Gj48P+/btw9nZuchYGo0GAHt7e9O29957j2XLlpX6Pt555x1ef/31UvvcPMfNMpfE//73PxYvXlxk+7PPPsuiRYtKncPOzq7EcR0cHMjJySmyfdOmTbi6uvLEE0+YJV8e33//PSqVil69enH48GE0Gg1qtRoAnU5HYGAgkZGRREZGAqDX68nJyaFNmzYA3Lhxg19//RV7e3vUajUqVWFDfdOmTdm6dSsfffQR3t7epu1CCPR6PTqdjkaNGnHnnXcWke3vv//mwoULzJkzp9B95OTkhI2NeV8/iYmJGAwGNBoNtra2KIpCcnIyWVlZ+Pn5ATBv3jwMBgNnzpzh0UcfNR3bsmVLpk+fbmqfO3eOrKwsbG1tSU5OxtnZmdOnTyOEIDg4mKtXr5osthEREWzdupXs7GwMBgN33323WfKWxpUrVxgwYAB33303CxcuLLZPVXwWLly4QN++fZk1axYTJkwo1G/dunUATJw4EYCVK1fSvXt3Hn/8cY4dO1bk+t9MVVxTgH/++YdBgwYV2qZSqfD29sbX1xc/Pz8CAwPp2bMn/v7++Pv7U69ePbKzs03nKI/Zs2eTkpLCwIED2bt3L82aNTNbDomktiMVuduUF154AYD58+czf/78Yvu4ublx7NixEhU5RVGKbFOpVOzevZtRo0axb98+rl69SvPmzRkxYgS9evUiOzu71LEK/oh8+OGHfPjhh3h4eBT6gTAYDKSlpZGamoqjo6NZ7xfyFbji5C6t/5kzZ0zbmjdvXqqSlid/niJVUp/ifix/+eUXfH19efvtt4s9btSoUbRr167QtvPnz7Np0yamT59OZGQk/fr1K/bY5s2bF2r7+/tz/fp1wOhae+mll0yKXJ7sqamp6PV63N3d8fLy4s033yQzMxMvLy8AcnJyyM7OJjs7m0ceeaRYRW7OnDl4eHjw3HPPlXg+8khPT+fOO+/kgw8+oEePHqbtjz32WBF38w8//AAY3YRhYWEsWbKEhx9+mF69egGwbds2Vq1axfbt2wsd9+qrr7J582ZUKhXZ2dnodDq6du1KSEgIy5cv5+677zY9aHz99df4+Phw6tQpNBqN6XxVFJ1Ox/Dhw+nTpw/fffddifdhVXwWAgMDadu2LU899RRCCJ588kkAMjIy+P7772ndujU2NjacPXsWMCqKrq6unD9/Hr1eT1ZWFg0bNjRd64JUxTXNkx1gxYoV9OvXD2dnZ5ydnctUJItDrVazePFiMjMzue+++zhy5AgODg7lHkciqY1IRe425Ntvv2XLli3ccccdnD17lpMnT+Lh4WHa//PPP/PQQw/x5ZdfEhgYWOhYrVbL8ePHcXR0JDk52dQ2GAy0bdsWgKioKLp27cr8+fPp1asXvr6+JcqyePFi7OzsSE5OBoxP6ZcuXcLFxYUHH3yw2GNUKhWurq64urpW8kyUTt4P5s1P9zY2NmzevJmLFy9ia2trUn4UReHkyZOA0bp28eJF6tevzz333GNSGuzs7EwxXFqtloyMDNzc3Dh48CCXLl2ibdu27N69u9B8qampHD9+nHbt2hVR5GbMmIFer2fy5Ml4eXmZrE0qlYqcnBwcHByYMGFCIctidnY2er3e1O7SpYspTuzUqVO0bNkSgAceeICTJ0+afuxff/113nvvPZP1Nisrq5DV6GY2btzIjh07mDRpEk5OTmWe78mTJ7N3715Wr15d6Ef/u+++Q6/Xo9FouP/++6lXrx4ff/wxWVlZqNVqnn32WVxdXVm8eDGurq7k5OSwYMECRo0aRZ8+fQrNsXbtWtP/7777Ll9//TVhYWEIIVAUhYiICP777z86duzITz/9ROvWrbnjjjtISUkpU/6ymDdvHsnJySUqcVX5WbCzs+PXX39l0KBBTJw4EUVRmDBhAsuXLycxMRGtVkvPnj0B4/2QkpKCq6srarUarVaLTqdjxYoVjBo1qtC4VXVNAdPnIM/SVhV88cUXtG3blvfee4933323SsaUSGo81vTrSqqfAwcOCHt7e/HMM8+IjIwM0bhxYzF8+HDT/u3btwuNRiOef/75Yo+/cuWKKV6l4CswMFAIIYSbm5uYOnVqifO/9tpr4tSpU6a2Wq0W3t7ewtnZWQDC09NTeHh4iE6dOlXqfWZlZYkRI0aIX3/91bTt008/FYC4ceOGEMIYq3TvvfeKpKSkYseYOnWquPkjAohp06aJcePGCTs7O+Ho6Cjc3NyEh4eH8PT0FK6urgIQzs7Owt7e3nRuf/nll2LPGyCuXLkiRo8eLerVqyd0Ol0ROXbu3CkA8fvvvxfavnv3btMY2dnZQgghjh07VuI8ea/PPvus2Pd78eJFYW9vL0aOHCmEEGLEiBGiadOmpv2vvfaa6XxMmTJFtG7d2nQubyYjI0MEBQUJQLz22mtF9j/yyCOme0YIId59910BiCeeeKJIzFpB+vfvL8aOHWtqr1ixQgBi8eLFpm2ff/650Gg0IiwsrMRx0tPTxf/+9z/h4uIi7rzzTjFixAiRlZUlhg4dKiZPniwAERcXJ4QQolOnTuLee+8tcSxz0Gq1wsvLS3z//fcl9rHEZyE+Pl40atRIrFixQmi1WtGwYUMBiDlz5pj6bNq0SQBi27ZtpY5V1dd07dq1AhALFiwQ27ZtM+u1ZcsW8ccff4gjR46UKOdvv/0mnJ2dS4wRlEhuNaRF7jYiPDycQYMG0bZtW+bPn4+9vT0//vgjPXv25OWXX6Z
2025-03-24 15:19:11 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 17
2025-03-24 15:19:11 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:32.636873Z",
"start_time": "2025-03-26T08:40:27.504821Z"
2025-03-24 15:19:11 +08:00
}
},
"cell_type": "code",
"source": [
"# ACF检验周期性\n",
"# 创建完整时间序列(每小时一个样本,缺失值用线性插值)\n",
"full_idx = pd.date_range(start=data['datetime'].min(),\n",
" end=data['datetime'].max(),\n",
" freq='h')\n",
"full_series = data.set_index('datetime').reindex(full_idx)\n",
"interpolated = full_series[indicators].interpolate(method='time')\n",
"\n",
"# 绘制ACF图检验3天周期24*3\n",
"plt.figure(figsize=(60, 20)) # 调整整体画布尺寸\n",
"for i, indicator in enumerate(indicators):\n",
" ax = plt.subplot(2, 4, i+1) # 创建2行4列的子图布局\n",
" plot_acf(interpolated[indicator].dropna(),\n",
" lags=72,\n",
" alpha=0.05,\n",
" title=f'{indicator}',\n",
" color=colors[i],\n",
" ax=ax)\n",
" plt.xticks(np.arange(0, 73, 12))\n",
"plt.tight_layout()\n",
"plt.savefig('./images/all_acf_subplots.png', dpi=200, bbox_inches='tight')\n",
"plt.show()\n",
"\n",
"for i, indicator in enumerate(indicators):\n",
" plt.figure(figsize=(12, 6))\n",
" plot_acf(interpolated[indicator].dropna(),\n",
" lags=72,\n",
" alpha=0.05,\n",
" title=f'{indicator} ACF',\n",
" color=colors[i])\n",
" plt.xticks(np.arange(0, 73, 12))\n",
" plt.savefig(f'./images/acf_{indicator}.png', dpi=200, bbox_inches='tight')\n",
" plt.close()\n"
],
"id": "5f8e89a8d1561e4f",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 6000x2000 with 7 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAF2YAAAfFCAYAAADkPWjkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdfZSeBX0m/ut+kgwwhjBJCjQzMBOScMBQB8ObNC4utO4JQQ2TBBFNEbtnd2sRquhqy24imTrbVraIuulPVxfDLrKeWpKJoTW+1Ze1gFRsJCupp2m2M3Fn2IoTJpGGvM3cvz8mxkZmIAkz88zL53POnPs893eee64nSv74zp3rLsqyLAMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMIFVqh0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGCkKWYHAAAAAAAAAAAAAAAAAAAAAAAAAAAAACY8xewAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwISnmB0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAmPAUswMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAE55idgAAAAAAAAAAAAAAAAAAAAAAAAAAAABgwlPMDgAAAAAAAAAAAAAAAAAAAAAAAAAAAABMeIrZAQAAAAAAAAAAAAAAAAAAAAAAAAAAAIAJTzE7AAAAAAAAAAAAAAAAAAAAAAAAAAAAADDhKWYHAAAAAAAAAGDC6+joSFEUR79OP/30/It/8S/y6KOPJsnR8z09PUmSnp6eo+d+5sknn8xVV12VU089Na94xSvym7/5m9m/f/8J5aivrz8mR1EU+eQnPzl8HxQAAAAAAEbYcOzcf2b16tV5xzveMejPeeCBB7JgwYLMmjUr73znO094Jw8AAAAAAOPRF7/4xTQ3N+fUU0/N5Zdfnr/+678+OvuLv/iLo7OFCxemvb29ikkBAGD8UswOAAAT2PLly9PY2PiC8/98yX7RRRflC1/4wqDvnzt3bt7znveMcEoAAAAAABg9/+k//ad897vfzZ/92Z/lrLPOyq//+q/nH/7hH47Ot23blmSghP2f+/GPf5wlS5bk4MGD+fjHP57bb789//2///esXr36uH/2P/7jP+bpp5/On//5n+e73/3u0a+VK1cOz4cDAAAAAIBRdLI795/5/Oc/nz/4gz8YdNbe3p63v/3tufbaa/O5z30u3//+93PHHXcM/4cAAAAAAIAx5K/+6q9y/fXX54orrsiXvvSlvPKVr8ySJUvy7LPP5vHHH09LS0uampryhS98Ia985Svz5je/OY899li1YwMAwLgztdoBAACAkdHX15dvfOMb2bNnT374wx/mwgsvTJKjS/Zrr702//k//+d86lOfyg033JC/+qu/ymte85oqpwYAAAAAgJE1b968XHbZZUmSX/u1X8s555yT++677+h827Ztueaaa15QEvPxj388DQ0N+V//63/llFNOSZJ0dXXlz/7sz/LHf/zHx/Wzt27dmtNPPz3XXXddiqIYpk8EAAAAAADVcbI79yT51Kc+lfe9731ZuHDhoNf+j//xP2bp0qVZt25dkmTBggW58MILs3bt2px99tkj8GkAAAAAAKD67rzzzlx00UX59Kc/naIosnjx4vzyL/9yPvnJT+aJJ57I7Nmz89BDD+WUU07JNddck8bGxvzxH/9xNmzYUO3oAAAwrlSqHQAAABgZjz/+ePbs2ZNTTjklX/3qV4+ev/vuu48u2ZcsWZLPfe5zmT179nGXxgAAAAAAwERRU1OTefPm5R/+4R+SJL/6q796tBxm27ZtxzzQ9J3vfGfa29uPlrInyezZs3Pw4MHj/nl/8zd/k8suu0wpOwAAAAAAE86J7NyT5Dvf+U7+8i//8mix+z/X3d2dv/3bv82qVauOnps/f34WLlyYr3/96yP4KQAAAAAAoHp+8pOf5JFHHsnKlSuP3nNeU1OTV73qVfnBD36Qb33rW7nqqquO3tNeU1OT173udfnWt75VzdgAADAuKWYHAIAJ6qtf/WrOO++8vOlNb8pXvvKVo+eHWrJ/85vfrFJSAAAAAAConv/3//5f5syZkyR51atelW3btiVJnnzyybzqVa86+n3nnHNOGhsbj74uyzJf/vKX89rXvva4f9b3vve9dHZ2Zu7cuTnttNNyxRVX5Mtf/vIwfRIAAAAAAKiu4925J8l/+2//LVdcccWg1+nu7k6SNDc3H3O+sbExO3bsGO7YAAAAAAAwJvzgBz9IWZa54IILjjl/77335rd+67fS09NzzD3tycB97j09PfnpT386mlEBAGDcU8wOAAAT1Fe/+tVcffXVufrqq/PNb34zhw4dyt69e4dcsv/kJz/Jc889V6W0AAAAAAAwunp6etLW1pZdu3blzW9+c5KBkpinnnoqBw4cyN/+7d++oCTmn3vwwQfzwx/+MO973/uO+2f+9V//derq6vLhD384mzZtytlnn503velN+eEPf/iyPw8AAAAAAFTLyezcK5Wh/3nr888/nySZOXPmMedPO+20PPPMM8OcHgAAAAAAxoYf//jHSZJZs2Ydc/6SSy7J/PnzkySveMUrjpn97LVidgAAODFTqx0AAAAYfnv37s3jjz+ef/tv/20uvfTSPPfcc3nsscdecsm+d+/eTJ8+fdTzAgAAAADAaHnrW9+at771rUmSGTNm5NOf/nRe85rXJEkuuOCC9Pf35+GHH05fX18uvPDCQa/xj//4j7njjjty00035Vd/9VeP+2d/4QtfyPnnn5/TTz89SfLrv/7rOf/88/PpT38699xzz8v8ZAAAAAAAMLqGY+c+mFNOOSVJMmXKlGPO19TUHC1tBwAAAACAiebAgQNJXvzhpmVZDvq6KIqRCwYAABOQYnYAAJiAvvGNb+Tw4cO5+uqr09jYmDPPPDNf+cpX8tu//dtJXrhk/9ly/RfPAwAAAADARPNHf/RHWbJkSU4//fScd955x9y0Pm3atFxwwQV54IEHcsEFF6SmpuYF7y/LMu94xztyyimn5E/+5E9O6Gdfcsklx7yeOnVqXvOa1+T73//+SX0WAAAAAACoppe7cx/KWWedlSTp7u7OnDlzjp7fvXt3zj///OH7AAAAAAAAMIZMnz49SfJP//RPx5z/3d/93Rw6dGjQ2c9ez5gxYxQSAgDAxDH045AAAIBx66tf/WqSZO7cualUKnnmmWfyla98JWeccUaSny/Vn3322dx7771HX59++unVCQwAAAAAAKOkqakpr371qzN//vxjCmJ+5lWvelU2b96cV73qVYO+//d///fzta99LZ/73Ocya9as4/65//RP/5Svf/3rLzi/e/fu7N+///g/AAAAAAAAjBEvd+c+lMbGxsyZMyePPPLI0XNlWeZv/uZvUl9f/7JzAwAAAADAWPSzh5P+n//zf445/8gjj6SrqytnnXVWOjs7j5n96Ec/yllnnZVXvOIVo5YTAAAmAsXsAAAwAX3lK1/JTTfdlK1bt2br1q35/d///Xzve9/LwYMHj1myb9q0KWvWrMn3vve9zJw509NPAQAAAACY9H5WDtPc3PyC2f/8n/8zra2tufvuu3PVVVed0HX/4R/+Ia9//euzY8eOo+c6Ojry7W9/O1deeeXLCw0AAAAAAGPQi+3cX0ylUsmKFSvyJ3/yJ3nuueeSJH/6p3+af/zHf8zrX//6Yc8JAAAAAABjwUUXXZRzzjknmzdvPnruueeey/e///1ceumlueaaa/Ltb387+/fvT5IcOHAg3/72t3PNNddUKzIAAIxbitkBAGCC6ezszI4dO/LGN74xr371q/PqV786N954Y/r7+/OXf/mXxyzZ3/KWt+S0007Lt771rVx99dXVjg4AAAAAAFX3s5KYnx1/ZseOHfk3/+bf5DWveU2uuuqqPPHEE0e/Dh48mGSgaH379u2DXvdXfuVX8rrXvS7XX399/ut//a/5L//lv+Tqq6/OK17xirznPe8Z0c8EAAAAAADVMNTO/Xh84AMfSE9PTy677LLcfPPNueWWW3L99dfn0ksvHe6YAAAAAAAwJhRFkT/6oz/K17/+9bzzne/
2025-03-24 15:19:11 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 18
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
2025-03-24 15:19:11 +08:00
"\n",
"\n",
2025-03-24 09:57:14 +08:00
"## 题目2\n",
"简述各项指标间的相互关系。"
],
"id": "59e20f3463e819a6"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:33.686442Z",
"start_time": "2025-03-26T08:40:32.644902Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#计算相关系数矩阵\n",
"correlation_matrix = data.iloc[:, 1:].corr()\n",
"#绘制热力图\n",
2025-03-24 15:19:11 +08:00
"plot_heatmap(correlation_matrix,20,16,title=\"Correlation Matrix Heatmap\",save_path=\"./images/correlation_heatmap.png\")"
2025-03-24 09:57:14 +08:00
],
"id": "c917d14115569bcd",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 2000x1600 with 2 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABxgAAAY1CAYAAAARrgupAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1QUVxsG8Gd36VWKImChFxXsvcd8MdWY2ECNvfduTGLXqLFrNPZo1JgYu9EoiqJGk6ixgZ3eFAWlLmVh+f5AF5eyOyjLsub5nbPnOHPnLu9cZ+7emXeKKD8/Px9ERERERERERERERERERAKItR0AEREREREREREREREREekOJhiJiIiIiIiIiIiIiIiISDAmGImIiIiIiIiIiIiIiIhIMCYYiYiIiIiIiIiIiIiIiEgwJhiJiIiIiIiIiIiIiIiISDAmGImIiIiIiIiIiIiIiIhIMCYYiYiIiIiIiIiIiIiIiEgwJhiJiIiIiIiIiIiIiIiISDAmGImIiIiIiIiIiIiIiIhIMCYYiYiIiOg/5/jx4/D19YWRkRGaNm2Ky5cvazukUnXo0AFz5szR+b9RkqCgIIhEIvj6+irm7du3DyKRCB06dKjweABAJBIhKCio3L93wIAB8PLyUpq3fft2iEQiJCcnl/vfIyIiIiIiItIkJhiJiIiI6D/lzz//xKeffopmzZrhxIkT8Pb2RufOnfH8+XNth6YR27dvV5sw27hxI4YNG1YxAZXg3r17yMnJAQDcunXrtb7jxo0bWLVq1RvHcuXKFTRu3PiNv0ebDh06hEOHDmk7DCIiIiIiInqLMcFIRERERP8pM2bMQN26dbF582Z06NABW7ZsgUgkwoYNG7QdmkYISTB6enrCwcGhYgIqgUwmw927dwEAN2/efK3vKK8EY5MmTWBubv7G36NNTDASERERERGRpjHBSERERET/GYmJibh48SK6desGkUgEADAwMICPjw9CQkK0HN1/V8uWLRWJxVu3bqF58+ZajoiIiIiIiIiIVGGCkYiIiIj+M0JCQpCfnw9PT0+l+StXrsTEiRMV0ydPnkSDBg1gaGiIunXr4uDBg0rLz5kzBx06dEBKSgpGjRoFe3t7nDlzRlEeGRkJkUiEyMhIbN++Hb6+vhg8eLDSd5w5cwbNmjWDkZERvL29sWfPnjKtS15eHr766ivUqFEDZmZmaN26Na5du6YoF4lEEIlEOHfuHObOnauYjoyMLPZdqt7BuGLFCtSuXRtGRkbo1KmTUiJ2wIABGDBgAI4ePQpvb2+Ymprigw8+QGJiYpnWxcfHB7du3UJqaioiIyPh4+OjVH7y5Ek0btwYJiYmcHJyUrpTccCAARCJRBg4cCCioqIU61l0fV6+W/H3339H8+bN8d5775UYS0nvYHz8+DGsrKwwd+5cxbxu3bqhXr16kMlkZVpXVaRSKcaNG4dq1arBysoKvXr1wtOnT5WWWblyJVxcXGBiYoIGDRrg9OnTijInJyeIRCLs2LEDO3bsULTFy/Xp0KEDRo4ciVatWsHCwgK//vorunbtClNTU3zzzTeK79m9ezfq1KkDExMTeHl5KW2bc+bMQb169TB9+nRUqVIFNWvWxMKFCyGXy8utHYiIiIiIiKjyY4KRiIiIiP4znjx5AgCwtrZWmt+oUSM0adIEABAUFISPPvoITZo0wfHjx/Huu++iW7duOHr0qFKdnJwcdOrUCdHR0fj666/h4eFR7O8tX74c8+bNg5+fH/z8/BTz79+/j/fffx9NmjRBQEAAevXqhd69eyMwMFDwuixZsgRLly7FggULcPz4cTg4OKBHjx6K8itXruDKlSto1KgRhg4dqpguy6NQ58+fjxkzZmD06NE4cuQIxGIx2rVrh+joaMUyV69exciRI/H111/jxx9/xKVLl7B48WLBfwMoSDDevHkTt27dgrW1NRwdHRVlERER+PTTT1GnTh0EBATgyy+/xOTJk3HhwgUABQmvK1euYPbs2bC3t1esZ0nvlPztt98wZMgQdO7cGSNGjBAcX/Xq1RXtHR8fj/Pnz+PQoUPYtGkT9PX1BX/P/fv3FUm/l0nRV40YMQKHDh3C+vXr8fPPPyMkJASff/65ovznn3/GpEmTMGrUKAQEBKBNmzbo3r070tLSAABHjx7FlStX8PHHH+Pjjz9WtMWr75TctWsXJkyYgObNm8PPzw9NmjTBmDFjsHLlSgDAxYsX8cUXX+DDDz9EQEAA+vTpg379+iE8PFzxHXfu3MG5c+ewd+9ejB8/HnPmzMG6desEtwMRERERERHpPj1tB0BEREREVFGys7MBAGJx6dfZzZkzBy1atMCWLVsAAJ06dUJoaChmzZqFTz75RLHcX3/9hTFjxmDt2rWlftfx48fxzz//oGrVqkrzFy1ahLp162L9+vUAgHbt2uHo0aP46aef0KlTJ0Hr4uvri/3796NLly4AgGfPnmHfvn148uQJqlWrpkiYmpubw8HBQTEtlFQqxeLFizFlyhRMmzYNANCqVSu4urpi5cqVioTUnTt3cPnyZcX3nzt3rszvUfTx8cH8+fNx8+bNYncv5ubmYtWqVejTpw/Mzc3RoEEDLFy4EH/99Rfatm0LJycnODk5ISQkBAYGBirX85dffsE///wDNze3MsUHAEOHDsVPP/2EGTNmICQkBMOHD0erVq3K9B1OTk5Kd8MeOXIEs2fPBlCQSN21axcOHDiArl27Kta9S5cuiIiIgLOzMxwdHbFr1y706dMHAGBmZoZ169bh7t27aNasmaLtbGxsAKDEtvD390fPnj1x+/ZtREVF4ZtvvsHZs2fx3XffAQCMjY2xceNGDBo0CBKJBB4eHpg/fz6uXr0KFxcXAAV3ef7yyy9wcnLCe++9h+DgYKxZswZjx44tU3sQERERERGR7uIdjERERET0n2FmZgYAyMjIUJo/ffp0TJo0CUDBnX9Fk3ydOnXCzZs3lR6HaWtri0WLFqn8e99++22x5CIABAcH48aNG0p3s127dg0PHz4UvC4ffvghsrKyMHDgQHh5eaFbt24AChKD5eH27duQSqV49913FfPMzMzQvHlzXLlyRTGvRYsWSomsqlWrlvmxoT4+Pnjy5AkCAgKKJRjd3d3RunVrLFq0CG3btkW1atUQFxf3Wus5bdq010ouAgVJtU2bNmHPnj149OhRme/SBABDQ0M0aNBA8alVq5ai7OXjez/77DPFNvEyefxyu2jfvj1sbGwwcuRI1K9fH02bNgVQtv/zl3ewikQipX+/1KhRI9SpUwdTpkxBs2bNULNmTeTm5ir9DUdHRzg5OSmmmzZtisjISOTm5paxRYiIiIiIiEhXMcFIRERERP8Z7u7uAKD0uEeg4LGQcXFxAID8/HylhAtQcMdjfn4+8vPzFfPq1q2rSFiWpnnz5qWWde3aFdevX1f6bN++XfC6+Pv7Y/To0XB0dMSiRYtw+fJlwXWFeLmupbXFS66urm/8t6ytreHg4IAjR44USzAePXoUjRo1QmRkJAYMGIArV66gXbt2r/V3VP1/CJGQkIC8vDykp6cjJSXljb6rNCdOnCi2XbRo0QJAQSK8e/fuMDExwVdffYWoqKhy//vr169Hx44dFe+DvHv3rlIiFIDS/z8AyOVyRVKUiIiIiIiI/huYYCQiIiKi/4y6deuiRo0aOHLkiGJeeno6bty4oXhPXdOmTXHmzBmleoGBgahfvz4MDAzKJY569eohOjpa6W62ixcvYs+ePYLqp6SkYO/evViyZAkWLFiAzz77DE+fPi1xWSMjI2RmZpY5xrp168LExETpvZAZGRn4+++/FXfOAYBEIinzd5fkZWLR19dXaf62bdvQsmVL/Pzzzxg8eDBcXV1LTKy97noKJZVKMXToUEyYMAFNmzYt8R2Pb6Ju3boACh7j+3KbsLOzw7JlyxTru2nTJkyaNAnLly9Hr169kJqaWuJ3vUlbbN68GX5+fti4cSP69u0Lc3NzPHv2TGmZ+Ph4pST95cuX4eLiUm7bAhEREREREVV+TDASERER0X+GSCTC4sWLcebMGYwYMQKnTp1Cz549YWB
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 19
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:34.734432Z",
"start_time": "2025-03-26T08:40:33.750658Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#主成分分析PCA)\n",
2025-03-24 15:19:11 +08:00
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
2025-03-24 09:57:14 +08:00
"PCA_data=data.iloc[:,2:]#去除日期列\n",
"\n",
"# 计算KMO值\n",
"kmo_all, kmo_model = calculate_kmo(PCA_data)\n",
"print(f\"KMO值: {kmo_model.round(3)}\")\n",
"# 进行巴赫利特检验\n",
"chi_square_value, p_value = calculate_bartlett_sphericity(PCA_data)\n",
"print(f\"巴赫利特检验卡方值: {chi_square_value.round(3)}, p值: {p_value}\")\n",
"#判断\n",
"if kmo_model>0.7 and p_value<0.05:\n",
" print(\"数据适合进行主成分分析\",'\\n')\n",
"else:\n",
" print(\"数据不适合进行主成分分析\",'\\n')\n",
"\n",
"# 数据标准化\n",
"scaled_data = (PCA_data - PCA_data.mean()) / PCA_data.std()\n",
"scaled_data = scaled_data.dropna()#去除空值\n",
"\n",
"# 计算协方差矩阵\n",
"cov_matrix = np.cov(scaled_data, rowvar=False)\n",
"\n",
"# 计算特征值和特征向量\n",
"eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)\n",
"sorted_indices = np.argsort(eigenvalues)[::-1]\n",
"sorted_eigenvalues = eigenvalues[sorted_indices]\n",
"sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
"\n",
"# 绘制累计方差解释比例图\n",
"explained_variance_ratio = sorted_eigenvalues / np.sum(sorted_eigenvalues)\n",
"cumulative_explained_variance = np.cumsum(explained_variance_ratio)\n",
"print(\"累计方差解释比例:\", [f\"{cum * 100:.2f}%\" for cum in cumulative_explained_variance])\n",
"\n",
"plt.plot(range(1, len(cumulative_explained_variance) + 1), cumulative_explained_variance, marker='o')\n",
"plt.xlabel('主成分数量')\n",
"plt.ylabel('累计方差解释比例')\n",
"plt.title('PCA 累计方差解释比例')\n",
2025-03-24 15:19:11 +08:00
"plt.savefig('./images/PCA_cumulative_explained_variance.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
"plt.show()\n",
"\n",
"# 选择特征值大于1的作为主成分\n",
"mask = sorted_eigenvalues > 1\n",
"selected_eigenvectors = sorted_eigenvectors[:, mask]\n",
"\n",
"# 计算因子载荷矩阵\n",
"loadings = selected_eigenvectors * np.sqrt(sorted_eigenvalues[mask])\n",
"\n",
"# 使用Varimax旋转载荷矩阵\n",
"rotator = Rotator(method='varimax')\n",
"rotated_loadings = rotator.fit_transform(loadings)\n",
"\n",
"# 输出旋转后的成分矩阵\n",
"rotated_components_df = pd.DataFrame(rotated_loadings,\n",
" index=PCA_data.columns,\n",
" columns=[f'Factor{i+1}' for i in range(rotated_loadings.shape[1])])\n",
"rotated_components_df = rotated_components_df.round(3)\n",
"\n",
"# 输出排序后的载荷矩阵\n",
"rotated_components_df=sort_matrix_by_diag(rotated_components_df)\n",
"print(\"旋转后的载荷矩阵(排序后):\\n\", rotated_components_df)\n",
2025-03-24 15:19:11 +08:00
"plot_heatmap(rotated_components_df, 4, 8,save_path=\"./images/components_heatmap.png\")"
2025-03-24 09:57:14 +08:00
],
"id": "509d783a82bbdcb2",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KMO值: 0.762\n",
"巴赫利特检验卡方值: 90424.712, p值: 0.0\n",
"数据适合进行主成分分析 \n",
"\n",
"累计方差解释比例: ['31.41%', '54.60%', '66.53%', '73.02%', '78.89%', '84.04%', '88.27%', '91.46%', '93.59%', '95.70%', '97.14%', '98.29%', '98.91%', '99.26%', '99.55%', '99.79%', '99.96%', '100.00%', '100.00%']\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjUAAAHECAYAAADBM9u5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABni0lEQVR4nO3dd3hT1f8H8HdG90hbWii0Bcreo6UspUwBBw5AGfJDcQEqiigKLkCRLSIq8GXJUhREULYoAiICLUtKUSy00AmlI+nMPL8/aiOhK2mTpmnfr+fJU+7NOfd+btI0H849QyKEECAiIiJycFJ7B0BERERkDUxqiIiIqFZgUkNERES1ApMaIiIiqhWY1BAREVGtwKSGiIiIagUmNURERFQrMKkhIiKiWoFJDRFZnVarLfO506dP48SJExYdLz8/H1999VWlYrl+/Tp+++23EvuXL1+O7OzscuvOmjULn376qck+g8EAS+YsVSqVOHbsGGbMmIHPPvsMarUahYWFFh2jtGPGxMTAYDCYVf7UqVOIjY2tsJxGo8HJkyehVCorHRuRPTGpISIjg8EAqVQKhUIBf39/48PJyQkeHh4m+9zd3dG7d+8Sx/joo48waNAg5OXllXqO//3vf1i2bJlFce3fvx/jxo3DoUOHLL6m1atXY/jw4bh586ZxX1RUFF599VXs37+/zHo3b97EJ598Ap1Oh+zsbGRnZ6OgoADbtm2DVCqFi4sLXF1dTR7fffedMd727dvD398fPj4+GDJkCKKiopCdnY358+fDzc0NUqkUEonE5HHs2DGTGAYOHIj58+eXiG3fvn3o2LEjCgoKzHoNFi9ejGHDhuHEiRP4/vvvsWvXLuzatQvbtm3D+fPnjeUKCwvRq1cvsxIgopqISQ2RnSUkJJh8sXl5eeHee+8t0ZqRlpaGkSNHwtPTE40aNcLChQtLHMtgMMDf3x/jx4+vVCzFX9bffvstbt++bXzcc889mDlzpsm+N998E66uriWOMX78eCQkJGDYsGGlfunK5XI4OztbFNfw4cMRHh6ON954w6zycXFxSEhIQFJSEp5++mkEBwcjNjYWSUlJSEpKwoIFC9C/f3/06dMHSUlJiI+PN0l6hBCYPHkyFAoFtm/fjs6dOyM4OBhxcXEYPnw4CgoK8Nxzz2HEiBEoLCxEYWEhpFIpfHx8AACRkZEYO3YsNmzYgPHjx2PUqFHYt28fxo4di3fffRczZswwvj4FBQV47bXX0K1bN0RGRppcR0ZGBvLz80tcX/HrZ87rmJubi0OHDuHVV19FVFQU5s+fj8ceewzLly/H559/jlOnThnLenh4mPwkcjRyewdAREU++ugjDB48GLdv38bq1asxcOBAxMbGIjQ0FBqNBoMHD4ZGo8G2bduQnJyMV155BQ0aNMDTTz9tPMbZs2eRkZFRqRaNYlKpFK+99hpmz55t3BcbG4urV69iz549xn1JSUlo06ZNifohISE4ePAgunfvjunTp+Pzzz83eV4ikZR57sLCQsTGxsLFxQUymczkuddffx3e3t7466+/TPYLIaDVahEUFIR69eoBAPr27YusrCyTL/0HH3wQhYWF8Pb2Nu7r0KEDhBDQ6XSYOHEili5dCgCYPn06zp49i+joaLi5uSE8PBwrVqxAx44dARTdXvvxxx+NrSi5ubkoKChAgwYNAABubm64ceMGhg8fjoCAANy+fRvHjh3D8OHDkZSUhJdeegktW7bEH3/8AYVCgdWrV5u8tsVkMlmJ1wH4L5lxcnIy7svKysLChQsxd+5cyOX//WnfuHEjCgoKMG7cOPj5+aFVq1YYOXIkDh8+bCxzzz33oGvXrsb3Sirl/3fJMTGpIaohmjVrhm7dugEABgwYgODgYKxbtw5z587F5s2bcfHiRZw5cwZhYWEAgPPnz2P+/PkmSc1PP/0EFxcXpKWl4eLFi8YvYUsYDAYsWrQI9913n3Hf4MGDMWDAAMyYMcO4b+7cuWX2jWnTpg1+/PFHdOnSxaJzJyQkIDw83OKYAWDz5s0YN24cACA5ObnE83PnzsWOHTtw7ty5co9z8uRJrF+/Hv7+/ggKCoIQAgaDAc888wyeeeYZfPnllzh9+jQ0Gg2GDx+OzZs3Y/PmzXB1dUWzZs0AFCUFFy9exPbt243H3b9/Px544AEoFAooFAp8/vnneOSRR+Dt7Y2pU6eiX79+JWIpKwG8e/+xY8fw1FNPISUlBZGRkXjggQcAADqdDsuWLUOjRo3g5+cHoKiPUWhoqEl9b29vKBSKcl8XIkfApIaoBnJ2dkazZs0QHx8PANi1axeaNWtmTGgAoFu3bli5ciXUajVcXFwAAIcOHcLYsWOxd+9e/PTTTxYnNQaDAYWFhXjxxRdNbkHcuHEDly9fxjfffGPcd/v2bbRq1cq4rdFocO7cOWMsjRo1gre3N1JSUuDq6mr8379Go4FWqzV20i0+Z6NGjdCqVSvk5eWZtNRIpVLs2rULDz/8cKkxCyGgVqtLtC5cuXIFbm5uxgTg7NmzaNKkCZKSkkzK6XQ6SKVSNG7cGADQs2dPXL16FaNHj8YLL7yAwMBArF27FkeOHEGLFi1gMBhw5swZrFu3Du7u7vD19UWDBg2wa9cu42um0+kwZcoUeHt749dffwUAvPrqq8Z+RlqtFnl5edBoNFAqlbh69SpOnDiBnj17lriO7OxsJCQkwGAwQKPRmNzyu3LlCmbNmoXt27dj7Nix+OCDD9C0aVPj86tWrUJcXByaNGli3Hf8+HGkpqZi6NChAIAZM2bA3d3dpNWHyGEJIrKr+Ph4AUBs3brVZH+TJk3E66+/bvz3/fffb/J8Wlqa2L9/vygsLBRCCJGXlyecnZ3Fxo0bxciRI8XgwYOrFFdKSooYMWKEaN26tfjll1+EEELodDrxv//9TzRq1Ejs27fPpHxSUpLw9vYW/v7+wsPDQ/Ts2VOkpqYKAGY9SqPVagWAEucyh0wmE56enkKhUAiFQiEACA8PD+O2QqEQ3t7ewsPDQ0RGRpaof99994lXX31VfPDBByIsLExERUWJ4OBg8fPPP4vs7Gzxzz//iOTkZBEeHi4yMjJM6m7fvr3Ua3ziiSfE22+/LYKDg0WjRo3EgQMHREpKihg5cqSQSCRCoVCIAwcOGI8THh4uZDKZcHFxEU5OTgKAGDJkiNi9e7cAINzc3MT48ePFX3/9VSL+f/75R3h7e4vOnTuLJk2aCCGEKCgoEL6+vuL1118XO3fuFD4+PuKXX34RI0aMELNmzRJCCAFAXLx40eLXm6gmYEsNUQ2TkZGBlStX4saNG3j88ccBALdu3cK9995rUq5BgwbG/20DwJEjR6DRaNC3b1+oVCq8+eabJq045jp27BjWrFmD7777Du7u7hg1ahS+/fZbrF+/HlFRUcjNzcWUKVPQtWtXk3pBQUHGocCzZ8/GkSNHUL9+faSnp8PNzc3Y8jJlyhTk5eVh/fr1AIpaWgoLC0uNJTc3F0DRSKS7+9IUq1evHgICAozbSqUSQgiT4doHDx7E+PHjcenSJWO/mzsJIZCZmQkfHx9jS4kQAj/++COcnJyQlpaGqVOn4vbt25DJZPjpp5/w/PPP448//sCZM2eMLTRCCEgkEgwdOhRXr17FpUuX8PDDD+OJJ57AokWL4OLigiNHjmDSpEmYOnUqfvvtN6xduxbbt29HYmIifvjhBwwePNgktnfffdfYv0kIAb1ejwMHDgAArl69ioYNG5b6uiQnJ6Np06Z46623MHPmTADAZ599Bq1Wi3fffReurq5QKpUIDg4utT6RI2JSQ1RDjBkzBmPGjAFQ1MdhzZo16NGjBwCUenvlbocOHULTpk3RpEkT9OvXDwUFBfjtt98waNAgi+LIysrC9evXMWbMGISEhCA+Ph7btm1Do0aN8NZbb2HgwIH4+OOPTTqjlkYul0MqlcLf399kv0wmg1QqNbmN4ubmVuoxbt++DQCYMGFCmedZsGAB3nrrLeP2q6++io0bN5Za9s5bM6VJTU1FYGAggKLbYq+//joUCoXJ7ScA2LJlC1QqlTGxUygUMBgMeOeddzB
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"旋转后的载荷矩阵(排序后):\n",
" Factor1 Factor3 Factor2 Factor4 Factor5\n",
2025-03-26 16:59:57 +08:00
"Tn -0.963 -0.035 -0.071 -0.079 0.035\n",
"T -0.958 0.138 -0.033 0.074 -0.028\n",
"Tx -0.954 0.014 -0.045 -0.052 -0.063\n",
"P 0.924 0.029 -0.071 -0.032 -0.000\n",
"Po 0.921 0.029 -0.073 -0.033 -0.000\n",
"Td -0.898 -0.366 -0.043 -0.013 0.076\n",
"O3 -0.637 0.529 0.030 0.239 -0.084\n",
"U -0.322 -0.824 0.008 -0.156 0.229\n",
"Ff -0.045 0.772 0.126 0.024 0.172\n",
"NO2 0.300 -0.728 0.290 0.110 -0.202\n",
"CO -0.101 -0.695 0.449 0.298 -0.007\n",
"VV 0.153 0.667 -0.531 -0.093 -0.175\n",
"AQI -0.017 -0.038 0.967 0.025 -0.029\n",
"PM10 0.037 0.060 0.933 -0.092 0.003\n",
"PM2.5 0.049 -0.359 0.879 0.149 -0.007\n",
"Pa 0.006 -0.055 0.147 -0.747 -0.130\n",
"SO2 -0.035 -0.099 0.208 0.694 -0.065\n",
"RRR -0.139 -0.094 -0.103 -0.077 0.819\n",
"tR 0.163 0.120 0.087 0.131 0.512\n"
2025-03-24 09:57:14 +08:00
]
},
{
"data": {
"text/plain": [
"<Figure size 400x800 with 2 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAMWCAYAAADrsBE8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hT5dvA8W+S7r0odFAonUBb9pApouICkT1E9pChoPJTVLYyZAsCgshQVEBAEGRTlqKASCkyuwsthRZI26QjbfL+UQiEpE0KSaG+z+e6znVxxp3ed07Ic57nnJwj0Wg0GgRBEAThIdInnYAgCILwdBINhCAIgmCQaCAEQRAEg0QDIQiCIBgkGghBEATBINFACIIgCAaJBkIQBEEwSDQQgiAIgkGigRAEQRAMEg2EoHXo0CEkEgl//vmnznKJRMLChQufTFKCIDwxooEQKlxSUhJTpkx50mkIgmCEaCCECpeUlMTUqVOfdBqCIBghGghBEATBINFACOWyceNGIiIisLe3p0GDBhw4cEBn/alTp2jTpg1OTk74+fnx0Ucfce+GwVOmTEEikdCuXTug5NyGRCJhwIABAKxZs4aaNWvyySef4OjoSN++ffnmm29wc3OjUaNGyOVyAOLi4njllVdwdXWlatWqDBs2jLy8PKCkdyKRSJgzZw4hISG4uLjQqVMn0tLSKugdEoT/EI0g3BUdHa0BDE4LFizQHDhwQCORSDSTJ0/WHD58WDNixAiNtbW15sKFCxqNRqPJycnReHh4aF588UXN4cOHNevXr9c4OTlp1q1bp9FoNJpr165pTp48qVm+fLkG0Jw8eVJz8uRJTWJiokaj0WhWr16tATSjRo3SzJ8/XwNo2rRpo9m1a5fGyspKs3HjRo1ardaEhYVpGjRooNm/f7/ml19+0fj6+mqmTZum0Wg0msTERA2gcXZ21qxcuVLz66+/asLCwjSNGjXSqNXqJ/K+CkJlJRoIQeteA7Fu3TrNP//8o53uNRBt27bVvP7669rti4uLNV5eXppJkyZpNBqNJj09XbNs2TLNtWvXNBqNRlNQUKBp2rSp5u233zb4dx62evVqjaOjo6awsFCTkJCgATS///67RqPRaAICAjSrV6/W5ObmapYtW6a5ePGiNocuXbpoXn75ZY1Gc7+BuNdgaDQazcGDBzWA5tixY+Z7swTh/wGriu+zCE+7kJAQ6tevr7c8NjaWW7duIZFIdJZfuXIFgGrVqtGhQwe++eYbjhw5wsmTJ8nJyaF27dom/20vLy+sra21f8PX1xdAO+/o6EiXLl1Yu3Ythw4d4q+//uLWrVu0adNG53Vatmyp/XeTJk2AkqGpB5cLglA20UAI5TJy5EiGDh2qs8zNzQ2A06dP06pVK9q1a0fXrl2ZNWsWixcvNuvfT01NpUGDBoSHh9O9e3cmTJjAnj17OHr0qM52mgcelKhWqwGQSsUpN0EoD9FACCaLiIggPT1dp3cxZcoUqlSpwqhRo/juu++oUqUKO3fuBEq+pK9cuUJoaKjO69jZ2QGQl5eHvb19uXLYsmUL2dnZHDhwAFtbWwCWLFmit93Ro0dp3749ACdOnABKekaCIJhONBCCySZNmsQLL7zAJ598QocOHfj999+ZNm0amzZtAkqGhzIyMti4cSNOTk4sXryY48ePU6tWLZ3XqVOnDs7OznzxxRe0a9eO2NhYunXrZlIOXl5eqFQqVq9eTUhICKtXr2bDhg16Q0cLFizAz8+PatWq8cEHH9C0aVOaN29unjdCEP6/eNInQYSnx72Tx8ePH9dZzt2T1BqNRrNhwwZN3bp1Nba2tprw8HDN6tWrtdvl5uZqevXqpXF2dtb4+/trhg0bphkxYoQmODhYo1KpdF7z119/1YSEhGisrKw0tWrV0qSlpWlWr16tqVGjhkajuX+y+d4VTjVq1NCsXr1aU1RUpBk9erTGw8ND4+3trenZs6dm4sSJGldXV82dO3e0cYsXL9aEh4drbG1tNS+88IImKSnJUm+bIPxnSTSaBwZrBaGSS0pKIjAwkJMnT9K4ceMnnY4gVGrirJ0gCIJgkOhBCIIgCAaJHoQgCIJgkGggBEEQKlhWVhaBgYEkJSWZtP3hw4epXbs2Xl5ezJ8/3+R1j0s0EIIgCBUoMzOT1157zeTG4ebNm3Tq1InevXtz/Phx1q9fT3R0tNF15iAaCEEQhArUq1cvevXqZfL269evx8fHh4kTJxISEsKkSZNYtWqV0XXmIBoIQRCEx1RQUEB2drbOVFBQYHDbFStW8O6775r82jExMTz33HPa+5E1bdqU06dPG11nDuKX1IIgVEo7rcOedApaJz/prfeUxMmTJxt8tO7DdxYwJjs7mzp16mjnXVxcuHbtmtF15vBUNBDPdjv+pFMwi0M/P8Ol+NQnnYZZhAVV51zc9SedhllEBFcjNi7jSadhFpHBVUm5cuFJp2EWASGm3+X3aTdhwgTee+89nWX37hX2uKysrHRey87ODqVSaXSdWf622V5JEATh/ylbW1uzNQgP8/Dw4ObNm9r5nJwcbGxsjK4zB9FACIJQKUmsJcY3+g9o0qQJP/74o3b+zJkz+Pn5GV1nDuIktSAIwlMgOzsblUqlt7xTp04cO3aM6OhoioqKmDt3Lh06dDC6zhxED0IQBOEpEBUVxcKFC+ncubPOci8vL+bNm0eHDh1wdXXF0dFReylrWevMQTQQgiBUSlKryj3E9PBt8Mr64dzIkSN58cUXuXDhAm3btsXFxcWkdY9LNBCCIAiVQHBwMMHBweVe9zjEOQhBEATBINGDEAShUpJYi+NbSxPvsCAIgmCQ6EEIglApVfaT1JWB6EEIgiAIBlXqBiKwuj3LZ0Xy65omjOhXw+S4Xq/78v3i+mz7tjHvDgnEzlb/bZg0LoR3BtU0Y7ZlS05K5L13R9K7R2dWr/pa7xK40pyLjWHk8EH07dWFX7b8bHCboqIixrw9hNizZ8yYcelSkhL439hhvNXjVdauWmZyLf/GnuGd4f0Y0LsT27du0C7fsH41XV9tqzedO/uPpUrQSklK4MOxw+jf4xXWrVparlreHf4mA3t35NcHagHYtvlHBvd9nX7dXmLO55+Sky23ROpaiUnJjBr3AW/07MuKb9eYVENM7DkGjRhN1z79+HnrNoPbFBUVMXTUO8ScjTV3ysJT4pEbiIKCAlJTU0lJSdGZKoq1lYQZH4VzOSGX4R+epYa/PS+1q2I07tX23nR9pRqfLYpj9KfnqB3sxHvDdO+u2KSeKw0iXPn2p4q58Z5KVcj0qRMJDg5l/qKlpKYkc2DfHqNxcvkdPps6kTZt2zFn3pccPnSAszFn9Lbb8vMGkpOTzJ+4ASpVITOnTSAoOIwvFq3gakoS0ft3GY2Ty+8wa9rHtGrbnhlzl3I0ej+xMSW3LX6jex/WbdihneYt+RYXVzcCg0IsXsusaR9RKziU2YtWlquW2dMm0Krt83dr2ce5u7WcP3eGQwd2M232Yr748htUhYWs/eYri9VQqFIxafrnhAQF8dXCuSSnpLJn/8EyY+7I5UyaPoN2bVuzaM5sDh46zBkDjcCGzVtJSq64//MPk1hLnprpv+qRGojFixfj5uZGzZo1dabAwEBz51eqZg3ccHSw4qs1yaRlFPDNDym8+py30bgX21bhp21pXIzLJTUtn9UbUmnZxF273sZGytihtVixPplcZbElS9D6++RJlAoFg4eOwMfHl379B7Nvr/EvokPRB3D38KRn7zfx9fOnZ+839eLSrl1l65ZNeFetZqn0dZw+9RdKhYIBQ0ZRzcePvv2HcmDvb0bjjkbvw83Dk+69++Pr50/33v05eDfOxsYWRydn7bTr1y107NwdR0cni9byj7aW0VTz8aNP/2Ec3LvThFr24ubhSbfe/fHxq0633v05cDfuyqULNGzcHD//AHx8/WnV9nnSrlnuQOTkqb9RKJSMGDIIXx8fBr31Jrv37S8z5uChw3h6uPNmrx74+/nyZu+e7N6rG3P1Who/b/mFalWN/58TKq9HaiAmT57MF198QX5+Pmq1WjsVF1fMFyp
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 20
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目3\n",
"令2022年11月1日至2023年9月30日的空气质量数据为训练集剩余数据为测试集。基于训练集尝试使用两种不同的方法构建空气质量指数预测模型并在测试集上测试。比较所选模型的预测效果。"
],
"id": "3f89fa62a897a3e3"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:40:35.162182Z",
"start_time": "2025-03-26T08:40:34.796022Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#重新读取数据\n",
2025-03-26 14:57:30 +08:00
"data = pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
2025-03-24 17:31:14 +08:00
"data['date_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n",
2025-03-26 14:57:30 +08:00
"data = data[['date_hour', 'date', 'hour', 'AQI']].set_index('date_hour')\n",
"data = data.asfreq('3h')\n",
"data['AQI'] = data['AQI'].ffill()"
2025-03-24 09:57:14 +08:00
],
2025-03-24 17:06:38 +08:00
"id": "d1bdac1e4e1562f2",
"outputs": [],
2025-03-26 16:59:57 +08:00
"execution_count": 21
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
2025-03-24 15:19:11 +08:00
"source": "### (1)SARIMA模型\n",
"id": "1fc53937767d55fd"
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:41:34.717925Z",
"start_time": "2025-03-26T08:40:35.213498Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
2025-03-26 16:59:57 +08:00
"该模型在假设不知道测试集其他指标的情况下仅使用AQI历史数据预测未来AQI。\n",
"整体运行时间约在1min左右请耐心等待。\n",
2025-03-24 09:57:14 +08:00
"\"\"\"\n",
2025-03-26 14:57:30 +08:00
"# 数据切分\n",
"train_end = pd.Timestamp('2023-09-30 23:00:00')\n",
"test_start = pd.Timestamp('2023-10-01 02:00:00')\n",
"train = data.loc[:train_end, 'AQI']\n",
"test = data.loc[test_start:, 'AQI']\n",
"\n",
"# 自动参数搜索\n",
"print(\"开始自动参数搜索(请耐心等待)...\")\n",
"model = auto_arima(\n",
" train,\n",
" start_p=0, start_q=0,\n",
" max_p=1, max_q=1,\n",
" seasonal=True,\n",
" m=8,\n",
" d=1,\n",
" D=1,\n",
" trace=False,\n",
" error_action='ignore',\n",
" suppress_warnings=True,\n",
" stepwise=True\n",
")\n",
"print(f\"最优参数组合Order{model.order} Seasonal{model.seasonal_order}\")\n",
"\n",
"# 初始化模型\n",
"current_model = SARIMAX(train, order=model.order, seasonal_order=model.seasonal_order)\n",
"current_results = current_model.fit(disp=False)\n",
"\n",
"# 预测\n",
"predictions = []\n",
"lower_bounds = []\n",
"upper_bounds = []\n",
2025-03-26 16:59:57 +08:00
"print(\"开始单步滚动预测(请耐心等待)...\")\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"for t in test.index:\n",
" forecast = current_results.get_forecast(steps=1)\n",
" pred_mean = forecast.predicted_mean.iloc[0]\n",
" pred_ci = forecast.conf_int().iloc[0]\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
" predictions.append(pred_mean)\n",
" lower_bounds.append(pred_ci.iloc[0])\n",
" upper_bounds.append(pred_ci.iloc[1])\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
" current_results = current_results.append(test.loc[[t]], refit=False)\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"forecast_df = pd.DataFrame({\n",
" 'predicted': predictions,\n",
" 'lower': lower_bounds,\n",
" 'upper': upper_bounds\n",
"}, index=test.index)\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"# 结果处理\n",
"valid_mask = forecast_df['predicted'].notna()\n",
"y_actual_valid = test[valid_mask]\n",
"y_pred_valid = forecast_df.loc[valid_mask, 'predicted']\n",
"\n",
"# 可视化\n",
"plt.figure(figsize=(15, 6))\n",
"train_last_3days = train.loc[train.index[-24]:]\n",
"train_last_3days.plot(label='训练集最后3天', alpha=0.7)\n",
"test.plot(label='实际值', color='green', alpha=0.7)\n",
"forecast_df['predicted'].plot(style='--', marker='o', markersize=5, label='单步预测值', color='red')\n",
"plt.fill_between(forecast_df.index,\n",
" forecast_df['lower'],\n",
" forecast_df['upper'],\n",
" color='pink', alpha=0.3, label='95%置信区间')\n",
"plt.axvline(test_start, color='gray', linestyle='--', alpha=0.6)\n",
"plt.title('AQI单步滚动预测结果 (SARIMA模型)')\n",
"plt.xlabel('时间')\n",
"plt.ylabel('AQI')\n",
2025-03-24 15:19:11 +08:00
"plt.legend()\n",
2025-03-26 14:57:30 +08:00
"plt.grid(alpha=0.3)\n",
"plt.tight_layout()\n",
"plt.savefig('./images/AQI-SARIMA.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 15:19:11 +08:00
"plt.show()\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"# 评估指标\n",
"metrics = cal_metrics(y_actual_valid, y_pred_valid)\n",
"print(\"\\n单步预测评估结果\")\n",
"print(f\"有效预测点数: {len(y_actual_valid)}/{len(test)}\")\n",
"for k, v in metrics.items():\n",
" print(f\"{k}: {v:.3f}\")"
2025-03-24 09:57:14 +08:00
],
2025-03-26 14:57:30 +08:00
"id": "5fb8d128fcab57d1",
2025-03-24 09:57:14 +08:00
"outputs": [
2025-03-26 14:57:30 +08:00
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始自动参数搜索(请耐心等待)...\n",
2025-03-26 16:59:57 +08:00
"最优参数组合Order(0, 1, 1) Seasonal(2, 1, 0, 8)\n",
"开始单步滚动预测(请耐心等待)...\n"
2025-03-26 14:57:30 +08:00
]
},
2025-03-24 09:57:14 +08:00
{
"data": {
"text/plain": [
2025-03-24 15:19:11 +08:00
"<Figure size 1500x600 with 1 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAAJOCAYAAABYwk4SAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXwcdf3H8dfMbs4mbY7ebdITCuW+oUgph/IDKYeoQIECCigilwqKioDKofJDFPiJIDcUVFBOkZajFSxHW45SWq70vmib+9pz5vfHd3eTTXbT3WQ3SdP38/HII83M7Mx3s0c3n/nM+2u5rusiIiIiIiIiIiIiIiKd2H09ABERERERERERERGR/kpFdBERERERERERERGRJFREFxERERERERERERFJQkV0EREREREREREREZEkVEQXEREREREREREREUlCRXQRERERERERERERkSRURBcRERERERERERERSUJFdBERERERERERERGRJFREFxEREZFe47puXw9BREREREQkLSqii4iIiEjGrV69msMPP5wtW7bELb/llluYOXMmDQ0Nae9z/vz5rF69ervbNTY2Jl23dOlS9txzz07jSld/PBlQX19Pc3Nzp+Uvv/wygUAg4W2eeuopfv3rX2d7aGlbtmxZXw8ha+69917mzp0bt6y2tpbrr7+e1tZWWltbuf3229m6dWuPj/XKK69w1113xX4eyL9XERERkWxSEV1EREREMq6yspJwOMyVV14ZW+Y4Dvfccw8ej4fBgwenvc9zzz2Xf/3rX11u09TUxC677ML//u//AhAOhwmFQrH1e+65JzU1Nfzzn/+Mu10wGExrLO+88w6DBw/mgw8+SOt2tbW1NDY24vP54r5aWlqoqamJjfXDDz/k0EMPZenSpQCsXLmSZcuWxX11PKHw1a9+tVNBvLq6muOPP56rrroq4Xg++OADXnzxxS7H/Ktf/QrLsvB6vbEv27axbTtumWVZ/PnPf07r95HISy+9xLHHHkttbW2P99UfPfjgg7z22msA+Hw+QqEQgUCAG264Adu2CQaDXHnllWzbto1QKERra2vstq+//jq//OUvufnmm7nlllvivq6//npWrVpFa2srLS0tgHmePv7444B57h177LGdCvgiIiIisn0qoouIiIjsRE499VQqKysTrnvhhRfYe++9yc/PZ4899uCZZ55JuN348eO54oorujyObdv89re/pby8nHA4DMDcuXPZuHFjrMC9PSeccAI33nhj7OecnBz8fj/btm1j27ZtbNmyhbVr1+L3+2PbFBUV8cADD3DDDTdwyimn8NBDD1FYWEhpaSkjR45k9OjR+Hw+fvaznzFy5EhGjhxJWVkZhYWFNDU1AVBTU8P777/PihUr+Pjjj1mxYgXvv/8+n3/+eew4BQUFNDY2MnTo0E7j/vTTTykuLmbo0KGxY4wcOZK//vWvHHjggYwYMYKRI0dSWFhIWVlZbP2IESNiRfnRo0dTUlLCoYceyksvvcQ555zDjBkzOPHEEznxxBP50pe+xGWXXRZ33Ly8PPLy8uKW/eUvf6GsrIzrr78eMB30fr8/1kkfLYBHBYPBTt3s+fn5jBs3jlAoFPs66aST+MEPfhC3bNy4cZ2On66NGzcye/Zs/vGPf1BaWgqYEyPf//73GTlyJIMHD+aMM86gvr6+023/8Ic/YFkWK1eujFt+3nnnYVkWlmXh8XiYNGkSv/jFL/D5fJ32cd5557Hvvvt2Wh69fXV1NWBOTkSXtec4DkOHDmX27NlJ72N+fn7s93TdddcxatQo9tprLwDGjRvHrrvuCsD06dMZOXJk3Gtt69atvPvuuzzwwAPce++9vP/++zzzzDP86le/4sMPP6SpqYmrrrqKb33rW0D8c6K0tJSnnnqKc845h02bNiUdn4iIiIh0piK6iIiIyE4iHA7z2muvsW7dOj7++OO4dW+//TannHIK48aN45lnnmG33Xbj61//Om+//XbaxxkxYgSWZXHkkUdyxx13xLqUjz/+eAKBAJMnT8ayLMaPH9/lfpqbm+O6yG3b5uqrr2bs2LGxr3HjxvHhhx/G3e74449nwYIFnHDCCXzrW98iEAjw9NNPc+mll7J582ZqamrYtm0bJ554IldddRXbtm0jGAxSVFQEwLx58zj44IM55JBDOPTQQznkkEM45JBDuPrqq+PGAlBYWNhp3BMmTODDDz9k6dKlLFu2jOnTp1NZWckxxxxDVVUVLS0t1NXVUVZWxpw5c6irq6OhoYFgMMgBBxwAQHl5Of/617+46aabOOyww8jPz+fWW29l9erVrF69miuuuILc3FxWrFiBZVls27atU0G3vr6e//3f/+Xmm2+mtLSUZ599lk8//ZT8/Hxs28ayLK677jr+85//xArCubm5TJkyJW4/Ho9nO49497ZN5Ec/+hEXXXQR06ZNiy278MILee2117j//vu55557ePnll+OucIiaN28eQMJO63HjxrFo0SLmz5/Pt771LX77299yySWXpD2+6JUBya5AePfdd6muro6Npb3oyaTo4+S6LjfddBNbt27l008/ZcqUKWzYsIE1a9YwYsQIFi1axLZt2+K6+7/2ta/x9NNP8+Uvf5n/+Z//4YknnuCSSy5h2LBhPPXUU+y1115xhfPo4xx1+OGHc+GFF/LjH/847fsuIiIisjPzbn8TERERERkI3n77berr68nLy2PevHnstttusXXRrvEnn3ySvLw8jjrqKCorK7n11lv5+9//ntZx8vPzuf322zn99NMBEylx8sknx3W/PvDAA9uN/oh2Drf/+U9/+hMXXHBBbJnjOHFFQp/PR15eHvvttx/77bcfAAsXLuTUU0/lggsu4LHHHsN1Xc466yxmz57Nd7/7XZ5++mkeeughJk6cCMDpp58eG3tHl19+OXl5edTU1ADwy1/+Mlaw3GWXXfj2t79NTk4O5eXlfOc732HXXXfl3Xff5e2336a8vDy2n9raWqqrqxN2PYMpsNq2HetCjt7HP/zhD9TU1MR+zs/PB0jYAX755ZczZswYzjvvPD788ENOPfVUrr76ajZt2kR+fj5er5ff/va3vPbaa7FIl0AgEHfiAkwhds2aNbFjgelYtyyLO++8M7bM7/d3KuSnY/369bz00kusX78+tqyhoYEnnniCp556ihNOOCG23U9/+lP+/Oc/k5OTExvPggULYs/t7373u3H7zs/P58ADDwTgiCOOoKamhrvuuos//elP5ObmpjzGpUuXctRRRyUtos+dO5e8vDw2b97Mhx9+GOswBzjwwAP58MMPcRyHV199lRtuuIG///3vrFy5knfffZd9992Xs846C4AZM2bwk5/8hIKCAh544IFOxwkEAhQXFyccQ6IO+fZ++tOfUllZyaZNmxg1alTK911ERERkZ6ZOdBEREZGdxLx585gwYQIzZ87s1K27YMECjjjiiFgxNjc3l+nTpzN//vy0j2PbNkOGDImLSwHiok2SFQCbm5tjHbsdJSoM2rZNOByORXOcd955HHXUUaxZswaA3/zmN3zlK1/hd7/7Hd/97ne5+OKLY93be+yxB0uWLGHMmDEcdNBBbNu2jXA4HBd3EuU4Dq2trey///4cdNBBsRMQBx54YKw42j6vvaioiF122YUbbriBa665JlZAf/zxx7Esi1GjRpGXl8duu+1Gfn4+Ho+Hiy66KHb7c845h6uvvrrT72LDhg1xVwdEfycdfzdz5szhoYceoqCggPPOO48zzjiDvffem+uvv57S0lIKCwspKioiNzcXj8dDUVERhYWF5OfnM3z48Lh9hcNhxo0bF5fhPnPmTK644oq4ZePGjcNxnISPXSr+8Y9/cPLJJ1NQUBBbtm3bNoC4STbPPfdc5s6dG/cYLVy4kKamJi6//HJeffXVpM+hqAMPPDAWDZSqww47LFY8X7p0KYccckinbebNm8esWbMYPnx4p9fYW2+9hd/vp7y8nGuvvRafz8epp57K4sWLGTlyJD/60Y/ivs444wyef/752O3fffddzjzzTM4991wWLFjAyy+/zAUXXMB9991HdXU
2025-03-24 09:57:14 +08:00
},
"metadata": {},
2025-03-24 15:19:11 +08:00
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-03-26 14:57:30 +08:00
"\n",
"单步预测评估结果:\n",
"有效预测点数: 248/248\n",
"RMSE: 11.893\n",
"R-squared: 0.932\n",
"MAE: 7.744\n"
2025-03-24 15:19:11 +08:00
]
2025-03-24 09:57:14 +08:00
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 22
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### (2)XGBOOST模型",
2025-03-26 14:57:30 +08:00
"id": "345e80b2d1b4204b"
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:41:34.837638Z",
"start_time": "2025-03-26T08:41:34.808815Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
"该模型在假设不考虑测试集其他指标的情况下仅使用AQI数据对未来AQI进行<单步预测>即每次预测都是根据之前时间点的真实AQI值进行的。\n",
2025-03-24 17:31:14 +08:00
"整体运行时间约为25s请耐心等待。\n",
2025-03-24 09:57:14 +08:00
"\"\"\"\n",
"#特征工程\n",
2025-03-24 17:31:14 +08:00
"data=data[['AQI']]\n",
2025-03-24 09:57:14 +08:00
"data_processed = data.copy()\n",
"\n",
"#时间分解特征\n",
"# 基础特征\n",
"data_processed['hour'] = data_processed.index.hour\n",
"data_processed['day_of_week'] = data_processed.index.dayofweek\n",
"data_processed['month'] = data_processed.index.month\n",
"\n",
"# 周期性编码\n",
"data_processed['hour_sin'] = np.sin(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['hour_cos'] = np.cos(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['week_sin'] = np.sin(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"data_processed['week_cos'] = np.cos(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"\n",
"#滞后特征\n",
"# 生成3小时粒度的滞后特征最多7天\n",
"lags = [i for i in range(1, 7 * 8 + 1)] # 7天*每天8个时间点3小时间隔\n",
"for lag in lags:\n",
" data_processed[f'AQI_lag_{lag}'] = data_processed['AQI'].shift(lag)\n",
"\n",
"# 划分数据集\n",
"train_data = data_processed.loc['2022-11-01':'2023-09-30']\n",
"test_data = data_processed.loc['2023-10-01':]\n",
"\n",
"# 特征选择\n",
"features = [col for col in train_data.columns if col != 'AQI']\n",
"X_train, y_train = train_data[features], train_data['AQI']\n",
"X_test, y_test = test_data[features], test_data['AQI']"
],
2025-03-26 14:57:30 +08:00
"id": "8920b904e0069cf7",
2025-03-24 09:57:14 +08:00
"outputs": [],
2025-03-26 16:59:57 +08:00
"execution_count": 23
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:41:58.039278Z",
"start_time": "2025-03-26T08:41:34.889618Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#随机搜索法参数调优(这里耗时较长,请耐心等待)\n",
"param_dist = {\n",
" 'n_estimators': [100, 200, 300],\n",
" 'max_depth': randint(5, 10),\n",
" 'learning_rate': uniform(0.01, 0.2),\n",
" 'subsample': uniform(0.7, 0.3),\n",
" 'colsample_bytree': uniform(0.7, 0.3),\n",
" 'gamma': uniform(0, 0.3)\n",
"}\n",
"\n",
"search = RandomizedSearchCV(\n",
" XGBRegressor(n_jobs=-1, random_state=42),\n",
" param_distributions=param_dist,\n",
" n_iter=10,\n",
" cv=3,\n",
" scoring='neg_mean_absolute_error',\n",
2025-03-24 17:06:38 +08:00
" verbose=1,\n",
" random_state=42\n",
2025-03-24 09:57:14 +08:00
")\n",
"search.fit(X_train, y_train)"
],
"id": "199aa487e826c1ac",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 10 candidates, totalling 30 fits\n"
]
},
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=3,\n",
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 16:59:57 +08:00
" gamma=None, grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 16:59:57 +08:00
" interaction_constraints=None,\n",
" learning_rate=...\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F4345D2330>,\n",
" 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001F433D34290>,\n",
2025-03-24 09:57:14 +08:00
" 'n_estimators': [100, 200, 300],\n",
2025-03-26 16:59:57 +08:00
" 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F433D355E0>},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring='neg_mean_absolute_error',\n",
" verbose=1)"
2025-03-24 09:57:14 +08:00
],
"text/html": [
2025-03-26 16:59:57 +08:00
"<style>#sk-container-id-2 {\n",
2025-03-24 09:57:14 +08:00
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 pre {\n",
2025-03-24 09:57:14 +08:00
" padding: 0;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 input.sk-hidden--visually {\n",
2025-03-24 09:57:14 +08:00
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-dashed-wrapped {\n",
2025-03-24 09:57:14 +08:00
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-container {\n",
2025-03-24 09:57:14 +08:00
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-text-repr-fallback {\n",
2025-03-24 09:57:14 +08:00
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel-item::after {\n",
2025-03-24 09:57:14 +08:00
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel-item {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
2025-03-24 09:57:14 +08:00
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
2025-03-24 09:57:14 +08:00
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
2025-03-24 09:57:14 +08:00
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-serial {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-toggleable {\n",
2025-03-24 09:57:14 +08:00
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
2025-03-24 09:57:14 +08:00
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-toggleable__content {\n",
2025-03-24 09:57:14 +08:00
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-toggleable__content pre {\n",
2025-03-24 09:57:14 +08:00
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2025-03-24 09:57:14 +08:00
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2025-03-24 09:57:14 +08:00
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-2 div.sk-label label {\n",
2025-03-24 09:57:14 +08:00
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label label {\n",
2025-03-24 09:57:14 +08:00
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-label-container {\n",
2025-03-24 09:57:14 +08:00
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator {\n",
2025-03-24 09:57:14 +08:00
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator:hover {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 a.estimator_doc_link {\n",
2025-03-24 09:57:14 +08:00
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 a.estimator_doc_link.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 a.estimator_doc_link:hover {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
2025-03-26 16:59:57 +08:00
"#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
2025-03-26 16:59:57 +08:00
"</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomizedSearchCV(cv=3,\n",
2025-03-24 09:57:14 +08:00
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 16:59:57 +08:00
" gamma=None, grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 16:59:57 +08:00
" interaction_constraints=None,\n",
" learning_rate=...\n",
" &#x27;learning_rate&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F4345D2330&gt;,\n",
" &#x27;max_depth&#x27;: &lt;scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001F433D34290&gt;,\n",
2025-03-24 09:57:14 +08:00
" &#x27;n_estimators&#x27;: [100, 200, 300],\n",
2025-03-26 16:59:57 +08:00
" &#x27;subsample&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F433D355E0&gt;},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring=&#x27;neg_mean_absolute_error&#x27;,\n",
2025-03-26 16:59:57 +08:00
" verbose=1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;RandomizedSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.model_selection.RandomizedSearchCV.html\">?<span>Documentation for RandomizedSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>RandomizedSearchCV(cv=3,\n",
2025-03-24 09:57:14 +08:00
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 16:59:57 +08:00
" gamma=None, grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 16:59:57 +08:00
" interaction_constraints=None,\n",
" learning_rate=...\n",
" &#x27;learning_rate&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F4345D2330&gt;,\n",
" &#x27;max_depth&#x27;: &lt;scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001F433D34290&gt;,\n",
2025-03-24 09:57:14 +08:00
" &#x27;n_estimators&#x27;: [100, 200, 300],\n",
2025-03-26 16:59:57 +08:00
" &#x27;subsample&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001F433D355E0&gt;},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring=&#x27;neg_mean_absolute_error&#x27;,\n",
2025-03-26 16:59:57 +08:00
" verbose=1)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">best_estimator_: XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
2025-03-24 09:57:14 +08:00
" colsample_bylevel=None, colsample_bynode=None,\n",
2025-03-26 16:59:57 +08:00
" colsample_bytree=0.9826605267054558, device=None,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, feature_types=None, gamma=0.16898646535366177,\n",
" grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.08708330050798323,\n",
" max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=6, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimators=100, n_jobs=-1,\n",
" num_parallel_tree=None, random_state=42, ...)</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
2025-03-24 09:57:14 +08:00
" colsample_bylevel=None, colsample_bynode=None,\n",
2025-03-26 16:59:57 +08:00
" colsample_bytree=0.9826605267054558, device=None,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, feature_types=None, gamma=0.16898646535366177,\n",
" grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.08708330050798323,\n",
" max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=6, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimators=100, n_jobs=-1,\n",
" num_parallel_tree=None, random_state=42, ...)</pre></div> </div></div></div></div></div></div></div></div></div>"
2025-03-24 09:57:14 +08:00
]
},
2025-03-26 16:59:57 +08:00
"execution_count": 24,
2025-03-24 09:57:14 +08:00
"metadata": {},
"output_type": "execute_result"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 24
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:41:58.151681Z",
"start_time": "2025-03-26T08:41:58.138759Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#模型预测\n",
"best_model = search.best_estimator_\n",
"y_pred = best_model.predict(X_test)\n",
"#评估指标\n",
"metrics=cal_metrics(y_pred, y_test)\n",
"#输出结果\n",
"print(\"最佳参数组合:\", search.best_params_)\n",
"print(\"评估指标:\")\n",
"for k, v in metrics.items():\n",
2025-03-24 17:06:38 +08:00
" print(f\"{k}: {v:.3f}\")"
2025-03-24 09:57:14 +08:00
],
"id": "fe076794bae89ccb",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-03-24 17:06:38 +08:00
"最佳参数组合: {'colsample_bytree': 0.9826605267054558, 'gamma': 0.16898646535366177, 'learning_rate': 0.08708330050798323, 'max_depth': 6, 'n_estimators': 100, 'subsample': 0.7692681476866446}\n",
2025-03-24 09:57:14 +08:00
"评估指标:\n",
2025-03-26 16:59:57 +08:00
"RMSE: 11.815\n",
"R-squared: 0.929\n",
"MAE: 7.722\n"
2025-03-24 09:57:14 +08:00
]
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 25
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 16:59:57 +08:00
"end_time": "2025-03-26T08:42:00.132933Z",
"start_time": "2025-03-26T08:41:58.312337Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#预测结果可视化\n",
"def plot_results(y_true, y_pred, timestamps):\n",
" plt.figure(figsize=(18, 8))\n",
" ax = plt.gca()\n",
"\n",
" # 绘制预测曲线\n",
" ax.plot(timestamps, y_true, label='真实值',\n",
" marker='o', markersize=4, linewidth=1, alpha=0.8)\n",
" ax.plot(timestamps, y_pred, label='预测值',\n",
" linestyle='--', marker='x', markersize=5, alpha=0.9)\n",
"\n",
" # 设置时间轴格式\n",
" ax.xaxis.set_major_locator(HourLocator(interval=12))\n",
" ax.xaxis.set_minor_locator(HourLocator(interval=3))\n",
" ax.xaxis.set_major_formatter(DateFormatter(\"%m-%d %H:%M\"))\n",
"\n",
" # 增强可视化元素\n",
" plt.title(f'AQI预测效果对比MAE={metrics[\"MAE\"]:.2f}, R-squared={metrics[\"R-squared\"]:.2f}',\n",
" fontsize=14, pad=20)\n",
" plt.xlabel('时间', fontsize=12)\n",
" plt.ylabel('AQI', fontsize=12)\n",
" plt.grid(True, which='both', linestyle='--', alpha=0.5)\n",
" plt.legend()\n",
"\n",
" # 自动调整标签\n",
" plt.xticks(rotation=45, ha='right')\n",
" plt.tight_layout()\n",
2025-03-24 15:19:11 +08:00
" plt.savefig('./images/xg_by_step.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
" plt.show()\n",
"\n",
"plot_results(y_test, y_pred, test_data.index)\n",
"\n",
"#特征重要性可视化\n",
"def plot_importance(model, features, top_n=20):\n",
" importance = pd.Series(model.feature_importances_, index=features)\n",
" top_features = importance.sort_values(ascending=False)[:top_n]\n",
"\n",
" plt.figure(figsize=(12, 8))\n",
" ax = top_features.sort_values().plot.barh()\n",
"\n",
" # 添加数据标签\n",
" for i in ax.patches:\n",
" ax.text(i.get_width() + 0.02, i.get_y() + 0.2,\n",
" f'{i.get_width():.2f}',\n",
" fontsize=10, color='dimgrey')\n",
"\n",
" plt.title('Top {} 重要特征'.format(top_n), fontsize=14)\n",
" plt.xlabel('特征重要性', fontsize=12)\n",
" plt.tight_layout()\n",
2025-03-24 15:19:11 +08:00
" plt.savefig('./images/xg_feature_importance.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
" plt.show()\n",
"\n",
2025-03-24 17:06:38 +08:00
"np.random.seed(42)\n",
2025-03-24 09:57:14 +08:00
"plot_importance(best_model, features)"
],
"id": "2551eec52baeb4cb",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 1800x800 with 1 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABv0AAAMVCAYAAABUfzjNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xT5f4H8M/J7kxLC5RNmQoie4riQBzXAcrFgQv1KuIAx1W8DkC9gqJXRbmIv6uiqLhwI4gDF1MUFBCwpWW3QFfSptnn+f0RcmiaJk1Ckqbh8369+rr2m5zn+8lzypNLH86JJIQQICIiIiIiIiIiIiIiIqJmS9XUAYiIiIiIiIiIiIiIiIjo+HDTj4iIiIiIiIiIiIiIiKiZ46YfERERERERERERERERUTPHTT8iIiIiIiIiIiIiIiKiZo6bfkRERERERERERERERETNHDf9iIiIiIiIiIiIiIiIiJo5bvoRERERERERERERERERNXPc9CMiIiIiIiIiIiIiIiJq5rjpR0RERERERERERERERNTMcdOPiIiIiIiIiIiIiIiIqJnjph8RERER0QnCZDKhoqIi7OPsdjteeuklbNiwocHHP/jgA1x11VUBj585cyYuueQS/Pbbb4322rNnDwoLC0P+2rdvX9ivpyGbNm3CzJkz8eeff/o9JssyqqqqYLPZGjz2hx9+wMyZM3HkyJGoZAlk9erVMR2fiGKDf3aJiIiIKF40TR2AiIiIiIgaZrVa4Xa7odfrodVqfR4zm83YsWMHDAYDdDqd37EulwtWqxW9e/dGamoqAODmm2/G6tWr8cYbb+Dcc8/F+vXrUVBQ4HdsdnY2/va3v/nU7rzzTjz11FMYMmSI3/MrKirw7rvv4rrrrsMFF1zg9/j333+PLVu2ID8/v9HXfO655zaYKZABAwbg119/Dfn5gRQWFmLWrFkYMmQIevXq5fNYaWkp2rVrh3/961/497//7Xfs6tWrMWvWLEycOBEtW7Y87iwNmTt3Lh588EGsXr0aQ4cOjUkPIoq+LVu24Mwzz8RDDz2EmTNnNnUcIiIiIkpy3PQjIiIiIkpQTz31FGbNmuVXT0tLw7JlyzB69GgYDAbo9XqYTCYAgNFoBAA4nU5YrVZs2rQJvXv3BgA88sgjGD9+PM4//3wsWrQI69evx2uvvYaOHTsqY5eUlKBXr14+m37eTcWGNhcB4LrrrsPs2bNRXFzs91hVVRXWrFmD6dOnIzs7u9HXnJKSgvPOOw8rVqxQal988QUuvvhiFBQUoFu3bkp97NixUbu6rkWLFgDgt7nqzVT3f+vzzoter49Klvrmz5+PRx99FB9//DE3/IgSTHV1NUpKStCyZcsG17g+ffrgyy+/xNixY5Gamor777+/CVISERER0YmCt/ckIiIiIgrDvn37IEkSxo4d2+Djsizj2WefRY8ePaDX65Gfn48nnngCLperweffcMMNkCQJNTU1fo9NnToVu3fvxsGDB3HkyBG88MIL0Ol0eO655zBq1Cg4nU5UV1ejrKwMp556Kq688kqUlZWhrKwMJpMJDodD2fADgFNPPRUbN27ELbfcggsvvBB6vR5DhgzBjh07lK9x48Ypm1iDBg3CFVdcAUmSfHI5nU6MGTMGl156KcaPH49rr70W/fv3x3fffYfx48fj4osvxt133w0A+Oyzz+B0OnHmmWeisLAQBQUF2LFjB7Zt29bgrTTVanVI58FLo/H/d4zfffcdBgwYgBEjRmDkyJE+X3Pnzm1wHG/f+q+17mMN9QICb4qazWYsXbo09BfTgNWrV+Oee+7Be++9h4svvtjnsTPPPBOSJGHcuHE+9ezsbEiS1OBVRWeffTbUajXKy8v9HuvcuTMkSWrw66WXXgortzdboK9FixaFPNaiRYuCjnXmmWcqz921axfGjh2LrKws6PV6dO/eHW+//XZY2cMxc+ZMnywpKSk4+eSTMXPmTJjN5pj1TVaBfm4bI8synnrqKXTt2lX5hwNFRUURZXA4HHjiiSdw8sknQ6/Xo1u3bpgzZ47fGl5bW4tJkyYhJycHPXv2RMuWLTFhwgRUVVX5jXnuuefik08+wSOPPIJvvvkmolxERERERKHglX5ERERERGFYtmwZAOCbb76B3W73u7rrnnvuwQsvvIBRo0bhzjvvxBdffIFHHnkE+/fvx8svvxxWr+zsbJjNZkydOhVjxozB9OnT8f777+PSSy/1eZ4QAgUFBZg0aVKjY2ZmZmLBggUAjm1w2Ww2PP/88xg9erRPPSUlBQaDwW8MSZIwcOBApKSkNHj1n81mQ15eHgDgf//7HwDgnHPO8Xte7969sXXrVp+aSqXCV1991eDmW/fu3f1qo0aN8qu1bdsWl112GXQ6HVSqY//Ocfr06bj88svx119/ISUlxeeqPu9nHVZUVGDPnj0AgE6dOimZgmno8e3bt+OKK67Atm3b8MUXXzR429PGuN1u/OMf/8Bdd92FSy65JODzNm3apPx3cXFxg5sOgOeKpJ9//hmyLGPFihWYOHGi33Py8/Px2GOP+dUbuq1rMA899BBuvvlmv/r333+PV199tcFzGcgZZ5yBxYsX+9VtNhumTJmCHj16AACOHDmCs88+GxaLBXfddRfatm2LBQsW4JprrkFqaqrf5mg0PfbYY8jPz0d5eTm+/fZbzJo1CytXrsTq1asb/Fmm6LrjjjuwYMECXHTRRZg6dapyC+M//vgDaWlpIY8jyzIuvvhifPvtt5g8eTL++c9/YtWqVXjwwQexbt06fPLJJ8pzp0yZgs8++wyvv/46Tj31VHzxxRd4+OGHkZ6ejtdee81v7HPPPRcPPvggbrnlFuzcubPBq4qJiIiIiI6bICIiIiKikF100UUCgAAgvvrqK5/HCgoKhEqlEsOHDxcOh0MIIYTL5RJnnHGGkCRJ7Nixw2+866+/XgAQ1dXVDfazWq3izjvvFADExIkTfR6bPXu2MBgMIi0tTajVapGSkiLS0tJEWlqaMBgMYu7cucpzXS6XePLJJ0VNTY1Su/fee8WoUaOE0+kUAMSMGTPE9ddfL0aNGiWEEGLUqFHi+uuvF0IIAUA899xzfvluuukm8dprrwkhhNi3b5+48sorRVVVlRBCiM2bNwsAyuNCCHHmmWeKMWPGCJvNJsxms994/fr1E6effrooKChQvl555RUBQHz77bc+9dGjR4vTTz+9wXmrr6ioSAAQK1euVM6fSqUSarVaqNVqoVKpBAAhSZIAIIYOHaocW11dLQCI2bNnNzj2iy++KACIkpISYbVaxaxZs0RKSorQ6/XigQceaPB1huKDDz4QWVlZAY8fNWqU8loqKiqEEEJ89NFHSm3GjBk+z//www+Vx6666iq/8Tp16iQGDhwYNFNFRYU4cuRIwC/vuW+Iy+USvXv3Fpdcckkjrzw0s2fPFqmpqWL//v1CCM/Pc05OjigqKvLJm5aWJs4555yo9KxvxowZAoD45ZdffOreP9dr166NSd9kVffntqqqKujPmvdnfsOGDQKAGD9+vJBlWQghRHl5ucjKyhKzZs0Kq/+7774rAIjnn3/epz5lyhQBQHz99ddCCCG2bt0qAIj33nvP53njxo0T2dnZAce3Wq2idevW4o033ggrFxERERFRqHh7TyIiIiKiENlsNnz33XeYNGkStFqtctWf12effQZZlnHjjTcqV3Go1Wpcf/31EELg888/D6tfbW0t1Go15s2bhyVLluDkk0/2eVytVkOv16OmpgaVlZVYvXo1Dhw4gJqaGqjVap8r0NavX48ZM2Zg2LBhypVsXhqNBjqdDllZWWHl27RpE1577TX88MMPAIDS0lIsXboUt956KwDPlXV5eXm48sorlWOsVitycnKg1+uRkZHhN6bb7UZqaiq6deumfLVp0wYA0LFjR596Wlpag7dNFULgX//6F3799Vel9uuvv0KtVmP48OE4cOAArFYr3G43XC4XXC4Xvv32WwDAypUr4XK5sGrVKr9xV65ciYcffhgPPfQQ/vnPf2Lq1Kk+z1u4cCG6deuGWbNm4fLLL8eOHTswZ86cBl9
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x800 with 1 Axes>"
],
2025-03-26 16:59:57 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAMVCAYAAACm0EewAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAADPwklEQVR4nOzde3TV1Z3//9c5uUJycnIjwWIuxFwAhSaODAE6Gm5fg07FMGgZLgu/QsEbtYCx8tWCfIXB6RpG+UoJbbHJQCGgUFqCVTjBwSbENHgBwqWkUEEC5AYHkkNyQi7n9we/nDEGQoInJwGej7U+a/G57L3fnyBrwcu998fgcDgcAgAAAAAAANzI2N0FAAAAAAAA4M5DKAUAAAAAAAC3I5QCAAAAAACA2xFKAQAAAAAAwO0IpQAAAAAAAOB2hFIAAAAAAABwO0IpAAAAAAAAuB2hFAAAAAAAANyOUAoAAAAAAABu59ndBQAAAODmNTc36/Tp0zpy5Ij+8pe/aMaMGerfv3+rZ+x2uw4ePCgfHx/5+Ph0qn+Hw6GGhgZ5e3trwIABbe4fPXpUO3fu1JQpUxQWFtbm/q5du/TVV19pzpw5NxyroaFBDz30kP71X/9Vzz//vIxG/v8pAAC3M0IpAACAHurChQtau3at6urqnIfVatX58+d1/vx5VVRU6MyZM7py5YokyWAw6IsvvtC2bdvk4eHh7OfMmTMaOXKkM5QyGAySroZVly9fVkhIiKSroVB1dbWCg4OdzzgcDl25ckUPPvigPvjggzY1HjhwQPPmzdP48eOvGUrt3LlTmzZt6lAo9c477+jTTz/VfffdJ6PRqOLiYvn6+srT09NZzzc5HA41NjaqqanpmoEZAADo2QilAAAAeqigoCD993//ty5duqSQkBAFBwcrKChIkZGRWrZsmR5//HGtWrVKd911l8LDwxUWFiZPz7Z/vbvnnnvU0NDQ5vrbb7+tefPmqaqqSpKUm5urcePG6fDhw+rbt2+HauzVq5ez1haHDh2SwWCQh4eH6urqJEl//etfJUn19fUyGo0aPHhwq34OHjyoV199VcOHD9eqVaskSUlJSWpqarphDffcc4+OHz/eoXoBAEDPQSgFAAC63cmTJ9ssOfumhx56SHv27HFfQd/S0NCgZcuWKTMzU2fOnFFAQICeeOIJ/ed//qf8/PxaPfvuu+9qxYoV+uqrr5SYmKhf/vKXuv/++zs95uXLl+Xl5aU//elPbWYJNTY2atmyZfr+97+vRx55pE3bpqYm2e12eXt7y8vLy3m9vr6+VTjVMsPKZrNJujpzSpJqa2ud1yQ5Zyu1KCsr0/33369f/OIXCgwMlCR5e3s770+ePFmHDx9uVdPAgQOdv/7+97+v/fv3O8+//vprPfLIIwoKCtKWLVucfW3fvl3e3t4yGo0yGo366KOP9O///u/65S9/qUGDBsnhcKipqanVrDAAAHDrIJQCAADdrk+fPlq/fr2kq8HJrFmz9E//9E+aPXu2JCk8PLw7y9NPfvIT/epXv9KTTz6pMWPG6C9/+Yt+/etfq6ysTH/84x+dz/3iF7/Qz372M/3gBz/QM888oz/+8Y8aO3asDh8+rLvuuqtTY/7oRz+65nK5b1qyZImWLFly3fsffvihUlNTnefPP/+83n333TbPmUymVuf33HNPq/NNmzbpRz/6kfP873//u86dO6eYmBhduHChTX9/+ctf5OPjI09PT73++uvKysrSyZMnJV0NzOrr653PHjx4UI8++qiqqqq0Z88eeXt76/z58woJCWkTuLX0cf/99ys5Ofm67w0AAG4NhFIAAKDb+fn5adq0aZKuztqZNWuWYmJinNe606FDh7RmzRqtXLlSP/nJTyRJP/7xj1VXV6eNGzeqtLRUd999t06fPq1FixZp+PDh2r17t7y9vTVnzhwNHjxYr776qn772992atxf//rXampquuZ+Sk1NTbr77ru1YMECvfTSS23aNjc3y263t1mC5+vrq759+2rfvn2Srs7qev3113X69GlJUn5+vv71X/9VX3zxhfr06aNjx45p7Nix8vX1bdXPgQMHJEmZmZk6d+6cJGn+/PlqbGzUb37zmzazx77Jw8NDvXv3dp4HBATI09NT//Vf/6Xk5GS9+OKL2rRpk/70pz/pH/7hHzr64wIAALcgQikAAIB2NDc3a/ny5XrhhRdaXU9MTNTGjRtVWVmpu+++W9u3b1d9fb1eeukl5/IzHx8fPf300/rFL36h3/zmN51aZva9731P0tWlet/eL6llnyW73a6LFy+2umcymdSvX79r9unt7S0PDw/dfffdkiSz2SxJzvPQ0FBJ0l133aW+ffs6+/7m0jxJKigo0F133aXKykqdP39eknTu3Dk1NjaqvLxcFRUV6tWrlzw8PFRVVaWGhgbnnlJS632loqOjdejQIfn5+en48eP61a9+pfj4eA0aNKjDPysAAHBr4ju7AADglrNu3ToNHjxYPj4+iomJ0Ztvvqnm5mbn/aeeekqBgYF6//33FRsbKx8fHyUmJt5wOdy1DBkyRK+88oqMxtZ/bWpZopaQkCBJKi4uliSlpKS0eu4f/uEfZLVa9dVXX3V6bEmqqKjQwIEDWx333XefJOmXv/xlm3s//elPr9vXlStX1NTUpJMnT+rkyZPOpXct5+Xl5ZKk06dP6+TJkzp79qyz3Tft2bNHTz/9tP7whz/o1VdflSRlZ2fLYrHo448/1tChQ3Xfffdp4MCB+uUvf6mzZ8+2qjExMVHTp0939ufn56fm5mbNmDFD3t7e2rJli3r16qW8vDxNnjzZOTPr2xobG3X58mXnZuoAAODWQigFAABuKcuWLdOMGTMUHh6u//zP/9SwYcO0cOFCzZgxo9VzNptNM2bM0L/8y7/ozTfflN1u12OPPabc3NzvXENJSYn++Mc/asqUKc6laFarVYGBgQoODm71bMt+WC37IXVWy9ftfvWrX6mhoUENDQ3ODcl//vOfO681NDTo+9//fpuldt9kt9tVVlam/v37q3///nrjjTckyXneslzyH//xH9W/f389/PDDznYtbDabxo8fr0cfffSaY0ydOlX19fVqamqSw+FwHnl5eZKkr776SvX19SooKGjV7ic/+YkKCgo0efJkxcfHS5LOnj2rzZs3O2djtRg+fLgMBoO8vLzk7++vn//85x37YQIAgB6FUAoAANwyTp8+rddff13jxo2TxWLR888/r+zsbM2dO1e/+93vtHv3buezTU1NWr58uf793/9d8+bN0549e+Tl5aWlS5d+pxqam5s1a9Ys+fr6OkOdluv+/v5tnm8Jrb69zK6jWr56N2fOHHl5ecnLy8sZPL3xxhvOa15eXjpw4EC7SwTXrl3bKihyOBzKycnR4MGDdejQoTb3Wo5vbnLu7++vX//61xo+fHirvjdv3qwJEyaotrZWDoejzT5Y32Q0GlvV+cYbb+iXv/yl816Llvf8dtD21ltv6cMPP9T27dv1/vvva+rUqe3+DAEAQM/EnlIAAOCW8dFHH6mxsVHPP/98q9Bj7ty5euedd7Rjxw6NGTPGef2pp55y/rpv3776p3/6J+eMnZu1aNEi5eXlad26da32bvrm5t3f5HA4JLWebdQZjY2NkqQVK1ZoypQpkq6/0fn999/v3G/qm44dO6YrV67Iy8ur1XW73a45c+aooaFBFRUV1w20bDabrFar7r33XudeV2VlZcrJyVFWVpYk6T/+4z80ZcoUHTlyRCNGjLju+/Tv31+SNGPGDGVlZel3v/udFi1apKlTp+qjjz5q9WxLPd9eOpmcnMzX9wAAuA0QSgEAgFtGRUWFJLXZyLtlo+6WPZGkq7NrWjbybhEaGqr6+nrV19fLx8en0+P/4Q9/0L/927/p2WefbbUnUktN586dU0NDQ6vwp7KyUtLVr8zdjJb9khYsWKAFCxa0urdixQqtWLHims9/07PPPqtPP/1UvXr1cgY8DodDFy5ckJeXlwICAvTEE09Iuhp4Xbx4UX5+fs4ZSi0/s+zsbP3Lv/yLxo8fr48++ki9evXSkCFDJEn79u1
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 16:59:57 +08:00
"execution_count": 26
2025-03-24 09:57:14 +08:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}