2025-03-24 09:57:14 +08:00
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": [
"# 预测建模\n",
"北京市空气质量指数预测( 推荐难度系数10) \n",
"\n",
"这个数据集是北京市2022年11月1日至2023年10月31日期间空气质量相关数据。\n",
"根据这个数据集,回答以下问题"
],
"id": "b610f839dca4877"
},
{
"metadata": {
"collapsed": true,
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:36.097058Z",
"start_time": "2025-03-26T02:28:35.825206Z"
2025-03-24 09:57:14 +08:00
}
},
2025-03-24 15:19:11 +08:00
"cell_type": "code",
2025-03-24 09:57:14 +08:00
"source": [
2025-03-24 15:19:11 +08:00
"import os\n",
"import sys\n",
"\n",
2025-03-24 09:57:14 +08:00
"#导入基础包\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
2025-03-24 17:06:38 +08:00
"\n",
2025-03-24 15:19:11 +08:00
"from statsmodels.graphics.tsaplots import plot_acf\n",
"import matplotlib.font_manager as fm\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-24 15:19:11 +08:00
"# 导入主成分分析相关包\n",
2025-03-24 09:57:14 +08:00
"from factor_analyzer import Rotator\n",
"from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo\n",
"\n",
2025-03-24 15:19:11 +08:00
"# 导入SARIMA相关包\n",
"from statsmodels.tsa.statespace.sarimax import SARIMAX\n",
2025-03-26 14:57:30 +08:00
"from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
"from pmdarima import auto_arima\n",
"import pmdarima as pm\n",
2025-03-24 15:19:11 +08:00
"\n",
"# 导入XGBOOST相关包\n",
2025-03-24 09:57:14 +08:00
"from xgboost import XGBRegressor\n",
"from scipy.stats import randint, uniform\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from matplotlib.dates import DateFormatter, HourLocator\n",
"\n",
2025-03-24 15:19:11 +08:00
"# 导入单独写的函数\n",
2025-03-24 09:57:14 +08:00
"from calculate import *\n",
"from heatmap import *\n",
"from sort_matrix import *"
],
2025-03-24 15:19:11 +08:00
"id": "initial_id",
2025-03-24 09:57:14 +08:00
"outputs": [],
2025-03-26 14:57:30 +08:00
"execution_count": 36
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:36.900743Z",
"start_time": "2025-03-26T02:28:36.202964Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
2025-03-24 15:19:11 +08:00
"# 设置字体\n",
"if sys.platform == 'darwin': # macOS\n",
" font_path = '/System/Library/Fonts/STHeiti Light.ttc'\n",
"elif sys.platform == 'win32': # Windows\n",
" plt.rcParams['font.sans-serif'] = ['SimHei'] # Windows系统自带黑体\n",
" plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题\n",
"else: # Linux/其他系统\n",
" font_path = '/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc' # 文泉驿字体\n",
"\n",
"# 仅非Windows系统需要加载字体文件\n",
"if sys.platform != 'win32':\n",
" try:\n",
" font_prop = fm.FontProperties(fname=font_path)\n",
" plt.rcParams['font.family'] = font_prop.get_name()\n",
" except:\n",
" print(f\"警告:{font_path} 字体加载失败,请检查路径有效性\")\n",
"\n",
"try:\n",
" os.mkdir('./images')\n",
"except FileExistsError:\n",
2025-03-24 17:31:14 +08:00
" pass\n",
2025-03-26 14:57:30 +08:00
"try:\n",
" os.mkdir('./results')\n",
"except FileExistsError:\n",
" pass\n",
2025-03-24 17:31:14 +08:00
"#读取数据\n",
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
"data.head()"
2025-03-24 09:57:14 +08:00
],
"id": "92ea7ba1218799cd",
2025-03-24 17:31:14 +08:00
"outputs": [
{
"data": {
"text/plain": [
" date hour AQI CO NO2 O3 PM10 \\\n",
"0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n",
"1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n",
"2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n",
"3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n",
"4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n",
"\n",
" PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n",
"0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n",
"1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n",
"2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n",
"3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n",
"4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n",
"\n",
" RRR tR \n",
"0 0.0 12 \n",
"1 0.0 12 \n",
"2 0.0 12 \n",
"3 0.0 12 \n",
"4 0.0 12 \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>hour</th>\n",
" <th>AQI</th>\n",
" <th>CO</th>\n",
" <th>NO2</th>\n",
" <th>O3</th>\n",
" <th>PM10</th>\n",
" <th>PM2.5</th>\n",
" <th>SO2</th>\n",
" <th>T</th>\n",
" <th>...</th>\n",
" <th>P</th>\n",
" <th>Pa</th>\n",
" <th>U</th>\n",
" <th>Ff</th>\n",
" <th>Tn</th>\n",
" <th>Tx</th>\n",
" <th>VV</th>\n",
" <th>Td</th>\n",
" <th>RRR</th>\n",
" <th>tR</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2022-11-01</td>\n",
" <td>2</td>\n",
" <td>18.371429</td>\n",
" <td>0.211429</td>\n",
" <td>23.771429</td>\n",
" <td>29.057143</td>\n",
" <td>13.257143</td>\n",
" <td>3.057143</td>\n",
" <td>2.628571</td>\n",
" <td>6.7</td>\n",
" <td>...</td>\n",
" <td>770.5</td>\n",
" <td>0.1</td>\n",
" <td>36.0</td>\n",
" <td>1.0</td>\n",
" <td>5.3</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.3</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2022-11-01</td>\n",
" <td>5</td>\n",
" <td>21.914286</td>\n",
" <td>0.180000</td>\n",
" <td>26.571429</td>\n",
" <td>20.142857</td>\n",
" <td>18.914286</td>\n",
" <td>3.771429</td>\n",
" <td>2.542857</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>770.8</td>\n",
" <td>0.3</td>\n",
" <td>62.0</td>\n",
" <td>0.0</td>\n",
" <td>1.9</td>\n",
" <td>17.3</td>\n",
" <td>7.0</td>\n",
" <td>-4.5</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2022-11-01</td>\n",
" <td>8</td>\n",
" <td>28.628571</td>\n",
" <td>0.311429</td>\n",
" <td>30.028571</td>\n",
" <td>14.285714</td>\n",
" <td>27.942857</td>\n",
" <td>6.857143</td>\n",
" <td>2.400000</td>\n",
" <td>6.6</td>\n",
" <td>...</td>\n",
" <td>771.7</td>\n",
" <td>0.9</td>\n",
" <td>56.0</td>\n",
" <td>0.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>10.0</td>\n",
" <td>-7.1</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2022-11-01</td>\n",
" <td>11</td>\n",
" <td>19.000000</td>\n",
" <td>0.237143</td>\n",
" <td>17.971429</td>\n",
" <td>40.529412</td>\n",
" <td>17.852941</td>\n",
" <td>5.914286</td>\n",
" <td>2.176471</td>\n",
" <td>13.5</td>\n",
" <td>...</td>\n",
" <td>771.3</td>\n",
" <td>-0.4</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-9.7</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2022-11-01</td>\n",
" <td>14</td>\n",
" <td>21.742857</td>\n",
" <td>0.252941</td>\n",
" <td>15.588235</td>\n",
" <td>53.617647</td>\n",
" <td>20.941176</td>\n",
" <td>6.742857</td>\n",
" <td>2.000000</td>\n",
" <td>15.7</td>\n",
" <td>...</td>\n",
" <td>768.6</td>\n",
" <td>-2.7</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.9</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
]
},
2025-03-26 14:57:30 +08:00
"execution_count": 37,
2025-03-24 17:31:14 +08:00
"metadata": {},
"output_type": "execute_result"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 37
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目1\n",
"研究单日内空气质量指数与各项指标的变化趋势,这种趋势是否具有周期性?"
],
"id": "bca65e544d8bef55"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:37.148177Z",
"start_time": "2025-03-26T02:28:37.014883Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
2025-03-24 15:19:11 +08:00
"# 数据预处理:将数据按小时分组,计算每个小时各指标的平均值\n",
"# 转换Excel日期序列值为实际日期并分组\n",
"data['datetime'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n",
"valid_hours = sorted(data['hour'].unique())\n",
"hourly_data = data.groupby('hour').mean().loc[valid_hours]\n",
"plt.figure(figsize=(12, 8))\n",
"indicators = ['AQI', 'PM2.5', 'PM10', 'CO', 'NO2', 'O3','SO2']\n",
"colors = ['#2d87bb', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728',]\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-24 17:31:14 +08:00
"normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)"
2025-03-24 09:57:14 +08:00
],
2025-03-24 15:19:11 +08:00
"id": "118b1b48e798a7ba",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 1200x800 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 38
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:46.540048Z",
"start_time": "2025-03-26T02:28:37.257554Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
2025-03-24 15:19:11 +08:00
"source": [
"# 绘制各指标小时均值变化趋势(标准化后)折线图\n",
"for i, indicator in enumerate(indicators):\n",
" plt.plot(normalized.index, normalized[indicator], \n",
" marker='o',label=indicator, color=colors[i], linewidth=2)\n",
"\n",
"plt.title('各指标小时均值变化趋势(标准化后)', fontsize=14)\n",
"plt.xlabel('小时', fontsize=12)\n",
"plt.ylabel('标准化值', fontsize=12)\n",
"plt.xticks(range(0, 24))\n",
"plt.grid(alpha=0.3)\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
"plt.tight_layout()\n",
"\n",
"# 新增保存代码\n",
"plt.savefig('images/hourly_trends_combined.png', dpi=300, bbox_inches='tight') # 保存组合大图\n",
"plt.show()\n",
"\n",
"# 新增保存子图代码\n",
"for i, indicator in enumerate(indicators):\n",
" plt.figure(figsize=(8, 5))\n",
" plt.plot(normalized.index, normalized[indicator], \n",
" marker='o', color=colors[i], linewidth=2)\n",
" plt.title(f'{indicator}小时均值变化趋势(标准化后)')\n",
" plt.xlabel('小时')\n",
" plt.ylabel('标准化值')\n",
" plt.xticks(range(0, 24))\n",
" plt.grid(alpha=0.3)\n",
" plt.tight_layout()\n",
" plt.savefig(f'images/hourly_{indicator}.png', dpi=300) # 保存单个指标子图\n",
" plt.close()"
],
"id": "57dedbd9b7bbe12d",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAHWCAYAAADzS2TwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3gU1feH3y3pvQNphN6kdxCCgIAoIkVEqYr69YeCIE0sgI0mKhYsqIAVEEQBpVfpIEjvCS2992y9vz822RDSlmTT4L7Pkyc7M3funN3ZnfnMueeeoxBCCCQSiUQikUgk1Q5lZRsgkUgkEolEIikdUshJJBKJRCKRVFOkkJNIJBKJRCKppkghJ5FIJBKJRFJNkUJOIpFIJBKJpJoihZxEIpFIJBJJNUUKOYlEIpFIJJJqihRyEolEIpFIJNUUKeQkEolEIpFIqilSyN2nyIIeEknFc/r0aS5cuFDZZtwznDx5kqtXr1a2GRJJpSKF3H3Ili1baNKkCd98802p9r9+/TrJyckYjUYAtFoty5YtY9OmTXfd1549exg7diw///xzqWyxhO3bt/P3339z+vTpcjuGv78/zZs3Jy0trVT7b968mcOHD5OVlWXxPrNnz2bFihWFHrNv376EhoZy+fLlUtmTnZ3N5s2b2bhxY6n2BwrcYFNTU8nOzi5VX+Hh4YwaNYrvv/++wLYFCxYwb948IiIiLOpr165dfPzxx4SFhRXYtn//fnbs2EFMTEyp7CyO6OhounXrRkJCQqHby/u3oNFoGD9+PJs3byYrKwutVsvff//Nrl27LO7DmufUEvR6PYcPHyY8PLzQ7efOnaN3795kZGSUmw0SSZVHSO47evXqJby9vcXFixdLtX/Xrl2FjY2NOHHihBBCCKPRKNRqtejVq1eBtikpKeLtt98WH330UaF9bdiwQQBi8+bN5nXXrl0TY8eOFRMmTBBTp04V06dPF9OnTxdTpkwRr7zyihg7duxd2dupUycBiF27dlnUPj4+XqSmpgqNRiN0Op3QarUiLS1NxMbGFrlPcHCw6NChQ7H9jhkzRjRs2LDQz71Vq1YCEG+++aZFNmo0GuHi4iJ8fX3FmTNnRFJSktDpdObtISEholGjRvn2MRqNIiMjQ6Snp5fYf3JysvD29hbNmjWzyJ47SU9PF0qlUrRp00YcOnRICCHEypUrBSCGDBly1/2Fh4cLQHz99df51hsMBuHv7y+aN28u0tLSLOpr0aJFAii0/dChQwUgbty4cdc2lsSwYcNE7969i9xe3r+F/fv3C0A0bdpUpKamiuTkZOHq6io6duxo0f7WOqdGo9F8/ISEBBERESFOnjwpdu7cKVatWiU++OADMXr0aNGpUyfh6OgoADF48OBC+zIYDKJevXpi2rRpFh9fIrnXUFeSfpRUEn/++Sfbt28HoGHDhkW2c3Bw4MqVK9SqVavANh8fH2rVqkXLli0BUCgUODg44OXlBYDRaOTgwYP8+uuv/PDDD6SlpeHo6Ei/fv1o1KhRvr5cXV0BsLOzM69LSkpi2bJlKJVKbG1tUalUCCEwGAxotVqEEIV6ZorC2dkZgNatW1vU/oMPPuCjjz4qsH7w4MGsWbOm0H0cHR1L7DczM5OLFy9Sp06dfOuTk5M5deoUderUYc6cORbZuH79etLS0njnnXfYsGEDr7/+OmA6F2q1Gp1Oh1KpxN7eHjCdE71ejxCCWbNmMXv2bACWLVvGtWvXcHFxwcHBATs7O9Rq02WhYcOG7N+/nyVLlpjfn1arJTs7m8zMTFJSUhg2bJj5e3A7+/fvx2g0YjQaadOmTb7PqF+/fha9R4CEhAQ8PDzM78PW1hYhBImJiXh5ebF69WoiIiLw9vbmqaeeAiA9PZ3Q0FDzewS4desW6enpODg4mL2eMTExhIeH07hxY/bu3YuNjQ1JSUk4ODgQERHBzZs3SUtLo1u3bjg4OFhsc2EcOXKEVatWsXXr1iLbWOu3sH//ft58803+/PNPc5+A+Xf/4Ycf4uLiApjOxbp167h48WKx14Pcfq1xTmNjY6lRo0aR221sbPDw8MDDw4PmzZvj7u6OXq/n1q1bBAQE5GurVCqZNGkSkyZN4qWXXqJ27doW2yGR3CtIIXcfkZCQwCuvvALAqFGjCr34rlmzhrVr1zJlypRCRRzkv9HkolQq+e+//xgwYAAHDx4kPj6eWrVqMWjQIEJDQ2nZsiX+/v4F9itMADVr1ozk5GTc3NwKbBNCkJKSUuJ7vR0bGxuzjZbg5OQEmEROLlOmTDGvL4xc8VMcRbVZv349BoOB3r17c/z4cfN6vV5PVlYW9vb2dOrUKd8+3333Hc7Ozjz//PNcvXoVf39/HBwczDf7AQMGMGjQIJ577jkUCgV6vR69Xo9Go6FBgwbmfv766y/Wrl1rtk+tVqPX6zEYDDg6OuLk5MSECRMQQmBnZ4fRaDQLCIA2bdoUKuT+/PNPwDTsmfu+izoPV69eJSQkpNDz07RpU2JiYsx9jBs3jrFjx6JSqUhLS+Ptt99m0KBBtGvXDoC1a9dy5coVVq5cma+fDz/8kMWLF2NnZ2e2vV69egDExcXx7LPPcuvWLYQQKBQKevXqZX6fmZmZhZ63u2HhwoXUqVOHXr16FdnGWr+FVatWsXv3bkaOHMm6devMn+vatWupUaMGer3ePGTeo0cPnnjiCS5evMjZs2fR6XS0bt2a+vXrF+jXWufU29vb/N6++OILHBwccHFxwc3NDVdX12J/Z4UxYsQIpk6dyrx58/jqq6/ual+J5J6gUvyAkgpHp9OJXr16iZo1a4pXXnlF+Pj4FBgq/Oeff4SdnZ1o166d0Gq1+bYlJiaKb7/9VqxZs0Z0795d+Pr6irVr14pvv/1WZGdnCzc3N+Hm5iaGDBkiFi9eLM6cOZNv/2vXruVbnjt3rvjmm2/EO++8IwAxY8YM8fXXX4uvvvrK6u+9f//+RQ6lFcasWbPEnT+N4OBgMXr0aDFt2jQxfvx4MXnyZDFt2jTzUJevr6+oWbOmGD9+vHjmmWdEfHx8gX5HjRolgHxDoEKYhqqBIv8ef/zxfO2PHTsmABEcHGxel5GRIbKysoQQQsTExAhAfPjhh+btBoOh0CHV6OhoERMTI2JjY4VGoxFCCPHcc8+JunXrmtv07NnTPGys1+uFEKZhtoiIiEI/04yMDOHp6Snat2+fb/2mTZsEIJYtW5avbe3atUWPHj3EpUuXCvT1xRdfiO+//1589913AhDjx48Xy5cvF4sWLRLjxo0TQUFB5vcVHh4uHB0dCwy/CiFEWlqa+f19+eWXAhB6vV4kJCQIg8EgMjIyhBBCNGrUSIwcOVIIYfoe1KxZs0Bfd0tUVJRQKpXitddeK3S7tX8LBoNBDBw4UABi4sSJQoi874wlf7efn1yseU6FEMLBwUF06dKlVO+vMAYPHiwcHR0tChuQSO41pJC7D8jOzhaPP/64UCgUYtOmTSI7O1s0bNhQPPTQQ+Yb2KFDh4Snp6do1KhRobFg58+fL/LCn5SUJNzc3MSwYcOKtKFZs2bmm4oQosi+bhcQpWH79u3iu+++ExcuXDCvu1PIZWVlie+//17cvHmz0D6KE3JNmzYVCoVC2NnZCRcXF7OAVSqVQqFQCIVCIYB8fb///vti1qxZonXr1gIQc+bMEdOmTRPnz58Xu3fvFoB4+eWXxenTp/P9/ffffwIQTz31VD5b+vTpU0DIvfHGGwIQSqVS2NnZCUDY2dkJR0dH8/Lw4cOL/NwGDRokWrZsKa5cuVKkkEtKShIdO3YUEydONAu6wvjkk08EIJ555pl86wu76Y8fP14AIigoyBx3VRhxcXH59j1x4oQICQkRixcvFtevXxeRkZGif//+okWLFuLmzZvi0qVLIjExsUA/aWlp5vM7c+ZM8cgjj4jk5GTx0ksviddee00oFAqxdOlSIYTpe3D7Z1xafvzxRwGIX3/9tdDt5fFbSExMFM2aNRNbt24VQgjx8MMPC0B89NFH4ubNm+LmzZvi4MGD5nXXr18XV65cEefOnSv0IcTa59TLy+uuhZxOpxPJycm
2025-03-24 15:19:11 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 39
2025-03-24 15:19:11 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:55.021371Z",
"start_time": "2025-03-26T02:28:46.711520Z"
2025-03-24 15:19:11 +08:00
}
},
"cell_type": "code",
"source": [
"# ACF检验周期性\n",
"# 创建完整时间序列(每小时一个样本,缺失值用线性插值)\n",
"full_idx = pd.date_range(start=data['datetime'].min(),\n",
" end=data['datetime'].max(),\n",
" freq='h')\n",
"full_series = data.set_index('datetime').reindex(full_idx)\n",
"interpolated = full_series[indicators].interpolate(method='time')\n",
"\n",
"# 绘制ACF图, 检验3天周期( 24*3) \n",
"plt.figure(figsize=(60, 20)) # 调整整体画布尺寸\n",
"for i, indicator in enumerate(indicators):\n",
" ax = plt.subplot(2, 4, i+1) # 创建2行4列的子图布局\n",
" plot_acf(interpolated[indicator].dropna(),\n",
" lags=72,\n",
" alpha=0.05,\n",
" title=f'{indicator}',\n",
" color=colors[i],\n",
" ax=ax)\n",
" plt.xticks(np.arange(0, 73, 12))\n",
"plt.tight_layout()\n",
"plt.savefig('./images/all_acf_subplots.png', dpi=200, bbox_inches='tight')\n",
"plt.show()\n",
"\n",
"for i, indicator in enumerate(indicators):\n",
" plt.figure(figsize=(12, 6))\n",
" plot_acf(interpolated[indicator].dropna(),\n",
" lags=72,\n",
" alpha=0.05,\n",
" title=f'{indicator} ACF',\n",
" color=colors[i])\n",
" plt.xticks(np.arange(0, 73, 12))\n",
" plt.savefig(f'./images/acf_{indicator}.png', dpi=200, bbox_inches='tight')\n",
" plt.close()\n"
],
"id": "5f8e89a8d1561e4f",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 6000x2000 with 7 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAF2YAAAfFCAYAAADkPWjkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdfZhWBZ038O+5gTEmxAE0lxmZIcFNcQMxc43K1XJDzOXVjBZdfHq2rNRerK18FpJJeizTdd3YtG192dW2l5WXsM229mm3p9R8qiXZFduIYiiGyoUGIkBg5jx/DEtLDCo4zD0vn891cZ2L8zv3zfdg9sdvjt9TlGVZBgAAAAAAAAAAAAAAAAAAAAAAAAAAAACgH6tUOwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwNGmmB0AAAAAAAAAAAAAAAAAAAAAAAAAAAAA6PcUswMAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/Z5idgAAAAAAAAAAAAAAAAAAAAAAAAAAAACg31PMDgAAAAAAAAAAAAAAAAAAAAAAAAAAAAD0e4rZAQAAAAAAAAAAAAAAAAAAAAAAAAAAAIB+TzE7AAAAAAAAAAAAAAAAAAAAAAAAAAAAANDvKWYHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPo9xewAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQL+nmB0AAPqpr371q3nDG97wjNd97Wtfy9y5czNmzJgcc8wxGT16dC644ILcc8892bt370HXjx07NgsWLDgakQEAAAAAoNtdccUVKYoiRVFk0KBBqa2tzbhx47JgwYK0t7fvv+5P/uRP8oMf/OBpv6ssy7z+9a8/6HxLS0ve8IY3ZOTIkRk6dGhe/vKX5ytf+cozZmtvb09tbe3+fP/168Mf/vDh3ygAAAAAABwFPbFn37p1a8aMGZOiKHLSSSd1+dlNmzblsssuy6hRo/L85z8/r3rVq/Ltb3/7ud0cAAAAAAD0Ajt37swdd9yR3/u938uJJ56YoUOHZuzYsfnDP/zD/OM//mOXn3niiSfy9re/PS960YtSW1ubE088Meeee24+/vGPZ8eOHT18BwAA0PcoZgcAgH7qQx/6UP7+7/8+69at63Le3t6et771rTnvvPPyox/9KNdee20+/elP50Mf+lBOPPHEvOlNb8rLXvay/OhHP+rh5AAAAAAA0L0mTJiQBx54IJ///OfziU98Iq95zWvy4Q9/OO985zv3X/PEE09k8eLFT/s9K1asyLJlyw54selPfvKT/O7v/m6+853v5EMf+lA+8YlPZNiwYbnwwgvzwAMPPO33PfHEE9m5c2c+9rGP5YEHHtj/63Wve91zul8AAAAAAOhOR3PPniTbtm3L7bffnje84Q1dfm7nzp05//zz87WvfS033HBDPvnJT6Ysy5x33nn57ne/+1xvDwAAAAAAquaxxx7L5MmT8453vCNNTU358Ic/nPvuuy/vete78vOf/zwXXnhh5syZk507d+7/zK233pozzjgj//Iv/5I3vvGNuffee3PzzTfntNNOy3ve855Mnjw5jz32WBXvCgAAer+iLMuy2iEAAIDu9c1vfjMve9nLkiRvfvOb84lPfOKga97xjnfkYx/7WG677bZcc801B82/+93vZvr06Rk2bFi++c1vZvjw4UmSsWPH5rLLLnvGh+YBAAAAAKA3uOKKK/KDH/wg3/jGNw44/973vjd/9md/lra2tgwbNiwvetGLsm7dujzxxBM55ZRTDvqesixz5pln5rvf/W7Wrl2b8ePHJ0muuuqqfO5zn8sTTzyR448/fv+1U6dOTWtra/793//9kNn+5m/+Jm9605vyy1/+Msccc0w33jUAAAAAAHSPo71n/+8WLVqUv/7rv85PfvKTA87/xV/8Rd797ndn9erVOe2005Ike/bsycte9rIMHz48X/3qV7vxjgEAAAAAoGf8+Mc/zktf+tLU1dXl85//fF70ohcddM2DDz6YuXPn5oYbbsjb3/723HnnnfnjP/7jLFy4MM3NzSmK4oDr169fn+nTp+c///M/861vfSsNDQ09dTsAANCnVKodAAAA6H4f+tCHMnr06LzrXe/K3/zN36S1tfWA+WOPPZaPfexjue6667osZU+SM844I1/84hfzgx/8IDfffHNPxAYAAAAAgB4zYsSItLe3Z9euXWlvb8+PfvSjFEWRG264ocvrV6xYkdWrVydJvv/97+8///Wvfz0zZszYX8qeJEVRZPr06fmP//iPp83wne98J6effnpqamrS3t7eDXcFAAAAAAA9o7v27M/G5z//+UydOnV/KXuSDBkyJH/yJ3+Sf/mXf8lPf/rTI78RAAAAAACokve///3ZuXNnvvKVr3RZyp4k06ZNy9q1a/P2t78927Zty7ve9a7Mnz8/H/zgBw8qZU+SsWPH5h//8R+za9eu/Omf/unRvgUAAOizFLMDAEA/s3r16nzhC1/Itddem/e+971JkltvvfWAaz71qU/lmGOOyXXXXfe03/U7v/M7ufTSS3PnnXcetbwAAAAAAHC0dXR0ZPv27dm+fXt++tOfZunSpfnoRz+aKVOm5Pjjj8/69euzZ8+evOc978nf/d3fHVQIU5ZlPvjBD+bSSy/NyJEjs3bt2v2zL3/5ywft4ZPkiSeeyNixY58213e+851873vfy9ChQ3PMMcfk93//9/O9732vW+4ZAAAAAAC6y9Hcsz8bjz/+eM4+++yDzk+ZMiVlWeaxxx57TvcHAAAAAAA97Ze//GU+97nP5S1veUvGjBnztNe+4AUvSJJ87nOfyy9/+cssXLjwaa8fPXp03vKWt+TTn/50fvWrX3VbZgAA6E8UswMAQD/zoQ99KCNGjMiVV16Z3/qt38r8+fNzxx135Be/+MX+a9asWZPTTz89w4YNe8bvO+ecc9La2pqtW7cezdgAAAAAAHDUPPLIIzn22GNz7LHHZvTo0Xnd616Xs846K5/97GeTZH8BzPve975MmDAhH/zgBw/4/IoVK7J69ep84AMfyLhx4w4ojPmt3/qtHHvssQdc/53vfCd33XVXrr766kNmKssyJ554Ym6++easXLkyd911VzZt2pRzzz03mzZt6q5bBwAAAACA5+xo7tmfjS1btuSEE0446Pxv/dZvJUmefPLJI7ktAAAAAAComnXr1mXv3r15xSte8aw/s2bNmhx//PEZN27cM177spe9LLt37866deueS0wAAOi3FLMDAEA/8v3vfz/3339/5s6dm61bt+YnP/lJ5s6dm+3bt2fJkiUHXFsUxUGf37RpU/793/89Tz311EGzjo6Oo5YbAAAAAACOphe/+MX5xje+kYcffjiPPfZYNm/enC9/+cs56aSTknQWxhx//PGpq6vL9ddfn09/+tP5j//4jySdBeof/OAH8/rXvz6nnXZaxo8fn+9///uH/LN+8IMf5OKLL84rX/nKXHPNNYe8riiKLFu2LFdddVVe85rX5I/+6I/yta99LWVZ5rbbbuvevwAAAAAAAHgOenLPfihdPf9eluUBRwAAAAAA6Cva29uTJIMGDXrWnzmc7pdKpbNmsqv9OgAAoJgdAAD6lRtvvDEdHR25/fbbM2bMmIwZMyavetWrkiS33XZbduzYkSQ57bTT8vjjj2f79u0HfH779u35vd/7vbzpTW/af+5b3/pWGhoaMmLEiJ67EQAAAAAA6EbDhw/Py1/+8rzsZS/LxIkTD9p5r127NuPGjUuSzJ49Oy9+8YvzwQ9+MEmyYsWKrF69Oh/4wAeSJOPGjcvatWu7/HNaWlry6le/OqNHj87999+//2H2Z2vUqFF5xSteke985zuHe4sAAAAAAHDU9NSe/VBGjhyZ//zP/zzo/M9+9rMkyQknnHDY9wQAAAAAANU0fvz4DB48ON/61ree9WdOO+20/Od//md++MMfPuO13/72t3PMMcfs398DAAAHUswOAAD9REtLSz71qU9l3rx5eeCBBw74deedd2bz5s355Cc/mSS57LLLsmvXrtx4440HfMcpp5ySpUuX5jOf+UxuueWWrFu3Lp/97Gfzxje
2025-03-24 15:19:11 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x600 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 40
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
2025-03-24 15:19:11 +08:00
"\n",
"\n",
2025-03-24 09:57:14 +08:00
"## 题目2\n",
"简述各项指标间的相互关系。"
],
"id": "59e20f3463e819a6"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:57.136686Z",
"start_time": "2025-03-26T02:28:55.596922Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#计算相关系数矩阵\n",
"correlation_matrix = data.iloc[:, 1:].corr()\n",
"#绘制热力图\n",
2025-03-24 15:19:11 +08:00
"plot_heatmap(correlation_matrix,20,16,title=\"Correlation Matrix Heatmap\",save_path=\"./images/correlation_heatmap.png\")"
2025-03-24 09:57:14 +08:00
],
"id": "c917d14115569bcd",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 2000x1600 with 2 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABxUAAAY1CAYAAADkUIAZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1gTSR8H8C9I7yACitIFG4piFxXUs4uenr1X7GevZy/YsfeO7ezC6enZe0GxF7qg0hUVQgmBvH9wRkMCogcEeL+f58nzyOzs5jfj7mR2Z3dWSSwWi0FERERERERERERERERElANlRQdAREREREREREREREREREUbBxWJiIiIiIiIiIiIiIiIKFccVCQiIiIiIiIiIiIiIiKiXHFQkYiIiIiIiIiIiIiIiIhyxUFFIiIiIiIiIiIiIiIiIsoVBxWJiIiIiIiIiIiIiIiIKFccVCQiIiIiIiIiIiIiIiKiXHFQkYiIiIiIiIiIiIiIiIhyxUFFIiIiIiIiIiIiIiIiIsoVBxWJiIiI6P9GSkoKNm/ejKZNm8LU1BSampqwsrJCr169cO7cOUWHJ2XAgAFwcXH56fVdXV0xYMAAyd87d+6Enp4ezp49mw/R5d3jx48xb9687+bz8vLCrVu3/tN3ZS+zPP+1Hnbv3g0lJSWIRCK5y5WUlLB79+6f2vaPsrKywty5cwvlu4iIiIiIiIg4qEhERERE/xceP36MmjVr4vfff4elpSWWLFmCffv2Yfz48YiNjUXr1q3RpUsXpKSkKDrUApGSkoKUlBQIhcJC/d6goCB4eXnh48ePOeZJSUnBsmXLEBQUVODxKKoeiIiIiIiIiIo7DioSERERUYn35s0btGrVCgDw5MkT7N27FwMHDkSXLl3w+++/48KFCzhz5gwuXLiAbdu2KTjagjFq1CikpqbC3d29UL83KCgInz59gpeXV455Nm/ejOjo6EIZVFRUPRAREREREREVdxxUJCIiIqISb9q0aUhJScH58+fh4OAgN0+bNm0QFBSEsWPHSqX7+PigadOm0NXVhY6ODpo0aQJfX1+pPEpKSpgyZQqqV68OfX19PH/+XG7aF4cPH0aDBg2gra0NXV1duLi44PDhw98tx6NHj9C5c2eULl0ampqaqFu3Lo4cOYLVq1fDysoKwNfpOa9evYo9e/ZIpuO8cuUKVFRU8Pr1a8n2BAIBZs+eDXt7e6irq6Ns2bIYNGgQIiIipL7X1dUVffr0wb1799CiRQtoa2ujbNmymDp16nef+AsKCoKKigpWr16NhIQEmeVfnlJUVlZGYGBgvpbZ1dUVnTt3RseOHaGlpYUdO3ZI1cP169ehrKyMtWvXSr7vxo0bMmn/1YMHD+Du7g5DQ0NoaGigatWq8PT0RFpamkzeK1euoFWrVtDX14e2tjZcXV1x+fJljBs3Dq6urgCAuXPnQklJCeHh4Zg3bx6UlJRw5coVXLlyBUpKSrh79y4GDRoEIyMjGBoaYtiwYRAIBPj48SOGDh2K0qVLw9TUFKNGjUJSUpLU9yclJWHGjBmws7ODuro6LCwsMGXKFLx9+1byPV9iKF++PCIjIzFw4ECYmJhAQ0MDjo6OWLVqVY7TwxIREREREVHxxUFFIiIiIirREhMTcfjwYQwfPhwVKlTINa+JiYnU33PnzkXHjh2hp6cHLy8veHl5QUdHB+7u7liwYIFU3hUrVqB+/frYu3cvqlatmmPatGnTMHLkSLRo0QK7d+/Gpk2b4OjoiH79+mHChAk5xvbXX3+hfv36CA0NxaxZs7Bjxw40aNAAffr0kXq6skWLFvD19UW1atXQvHlz+Pr6okWLFjLb+/jxIxo1aoSVK1fi119/xZ49ezBu3DicP38eNWvWxOPHj6Xy+/n5wdXVFVWrVsXu3bsxePBgrFmzBiNGjMi1ToOCgtC/f3+oq6tj1apVMss3b96MpKQk9OnTR+ZJxfwo88mTJ/Hhwwds3boVPXr0kNp+48aNMXbsWMyYMQOvX79GWloahgwZAhcXF4wZMybXcgkEAiQlJcl8svPx8UGTJk1gZmaGdevWYe/evejSpQvWrVuHZs2aITU1VZJ3y5YtaN68OVJTU+Hp6YlNmzbBxsYGLVu2lHoHZK9eveDr6wsTExP07NkTvr6+cHR0lCzv3Lkz3r59iw0bNmDmzJk4cOAABg4ciObNm+Pdu3dYv349Jk+eDG9vb6lyJiQkoEGDBli7di1+++037NmzB2PHjsXx48clT/p+KzExEXXr1kVwcDDmzJmD7du3o3Hjxpg6dSratm3LKWaJiIiIiIhKGjERERERUQn28OFDMQCxj4/PD6139epVMQDxggULZJYtXbpUDEB8/fp1sVgsFgMQV69eXZyZmSnJIy/t0qVLYi0tLfGjR4/EiYmJUp8zZ86IlZSUxH5+fmKxWCzu37+/uFGjRmKxWCz+8OGD2NDQUNy+fXuxUCiUiuXOnTtiNTU1saWlpVR606ZNxf3795f8ffnyZTEAcVhYmFgsFosHDhwo1tXVFb948UJqvU+fPonr1asndnBwEItEIsm2AIj37dsnldfT01OsqqoqTkxMzLEeTU1NxWvWrBEvX75crKurK37//r1kWXJystjMzEw8ffp08YYNG8Ta2tqSZflR5qZNm4pVVVXFUVFROdaDQCAQ29nZiX/55Rfx9OnTxVpaWuKgoKAcy7Nr1y4xgFw/u3btkpTBwMBAvHPnTpn/77CwMHGZMmXEy5cvF4vFYnFgYKBYVVVVPHz4cKl9RiwWi48dOyYGIG7atKlUuqWlpXjOnDkyZWvbtq3UNpYsWSI3fcWKFWItLS1xRkaGWCzO2uf09PTEr169kvoegUAgrl+/vhiA+PLly2KxWCyeM2eOGIDYw8NDJt5r166JVVRUxPPmzcuxHomIiIiIiKj44ZOKRERERFSiZWRkAABKlSr1Q+vt2bMHNjY2mDFjhsyyKVOmSJ7Y+6Jdu3ZQUlKSypc9zdvbG8nJyXBycoKurq7Up23bthCLxTh37pzM9/n6+iIhIQEbN26Eqqqq1LJ69eph0KBBP1Q2oVCIQ4cOYeLEiahcubLUMj09Paxbtw4BAQG4ffu2JL1q1aro3bu3VN5WrVohPT0dISEhcr8nMTERMTExsLW1xciRI6GlpYWVK1dKln95SnHixImws7ODQCBAZGRkvpa5du3aMDMzy3G5lpYWdu7ciQsXLsDT0xOLFy+GnZ3dd7d75coVXL9+XebzLV9fX3z8+BGDBg2S+f+2trZGXFyc5P/74MGDUFNTg5eXl8x+1LlzZ7Ru3TpP5QUADw8PqW18eYpxyJAhUunVqlVDcnIyoqOjIRQK8eeff2LSpEkyUwRraWlh9erVMt+jrq4uN97GjRtj4MCB2Lt3b55jJiIiIiIioqJPRdEBEBEREREVJDs7O6ioqMDPzw9t27bN83pv3ryBo6MjlJXl34dXtWpVvHnzRvJ32bJlZfJkT3v79i0aNmyIpUuX5vi95ubmMmlv376Fvr5+jtO3Vq1aFX///XeO28wuPj4eKSkpqFGjhtzl1apVAwCp8lWqVEkmn76+PgDg8+fPcrfzZTpTW1tbaGlpYcqUKZg7dy4mTJgALS0tLFu2DGPHjkXp0qVha2srWadcuXL5VmZ5/y/Z1alTB+bm5nj37l2eB+8aNWoEFZXcT6fevn0LTU1N/PPPPznm0dHRkeS1s7ODhoaG3HxVq1bF/fv38xTbl3dNfqGpqQkAsLGxkUpXV1cHkPVey/j4eKSmpkpNo5r9++V9z5dtZ+fk5IQ9e/bkKV4iIiIiIiIqHvikIhERERGVaPr6+mjfvj02bNiA+Pj4XPMKBALJvytUqIDAwMAc8z579gzly5eX/C1vMCh7mrm5OaKiotCoUSO4uLhIferWrYvIyEhYWlrKbKdChQr49OmT1CDft168eJFrubIzNjaGhoYGAgIC5C5/+vQpAHy3fF8GXL88DZpdUFAQlJSUYG1tDQAYMWIEtLW1sXLlSmzevBkCgQATJ04EAFhaWkJFRUUyEJlfZc5pkO5bs2bNwqdPn2Bvb4/BgwcjMzM
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 41
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:28:59.213730Z",
"start_time": "2025-03-26T02:28:57.655272Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#主成分分析( PCA)\n",
2025-03-24 15:19:11 +08:00
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
2025-03-24 09:57:14 +08:00
"PCA_data=data.iloc[:,2:]#去除日期列\n",
"\n",
"# 计算KMO值\n",
"kmo_all, kmo_model = calculate_kmo(PCA_data)\n",
"print(f\"KMO值: {kmo_model.round(3)}\")\n",
"# 进行巴赫利特检验\n",
"chi_square_value, p_value = calculate_bartlett_sphericity(PCA_data)\n",
"print(f\"巴赫利特检验卡方值: {chi_square_value.round(3)}, p值: {p_value}\")\n",
"#判断\n",
"if kmo_model>0.7 and p_value<0.05:\n",
" print(\"数据适合进行主成分分析\",'\\n')\n",
"else:\n",
" print(\"数据不适合进行主成分分析\",'\\n')\n",
"\n",
"# 数据标准化\n",
"scaled_data = (PCA_data - PCA_data.mean()) / PCA_data.std()\n",
"scaled_data = scaled_data.dropna()#去除空值\n",
"\n",
"# 计算协方差矩阵\n",
"cov_matrix = np.cov(scaled_data, rowvar=False)\n",
"\n",
"# 计算特征值和特征向量\n",
"eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)\n",
"sorted_indices = np.argsort(eigenvalues)[::-1]\n",
"sorted_eigenvalues = eigenvalues[sorted_indices]\n",
"sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
"\n",
"# 绘制累计方差解释比例图\n",
"explained_variance_ratio = sorted_eigenvalues / np.sum(sorted_eigenvalues)\n",
"cumulative_explained_variance = np.cumsum(explained_variance_ratio)\n",
"print(\"累计方差解释比例:\", [f\"{cum * 100:.2f}%\" for cum in cumulative_explained_variance])\n",
"\n",
"plt.plot(range(1, len(cumulative_explained_variance) + 1), cumulative_explained_variance, marker='o')\n",
"plt.xlabel('主成分数量')\n",
"plt.ylabel('累计方差解释比例')\n",
"plt.title('PCA 累计方差解释比例')\n",
2025-03-24 15:19:11 +08:00
"plt.savefig('./images/PCA_cumulative_explained_variance.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
"plt.show()\n",
"\n",
"# 选择特征值大于1的作为主成分\n",
"mask = sorted_eigenvalues > 1\n",
"selected_eigenvectors = sorted_eigenvectors[:, mask]\n",
"\n",
"# 计算因子载荷矩阵\n",
"loadings = selected_eigenvectors * np.sqrt(sorted_eigenvalues[mask])\n",
"\n",
"# 使用Varimax旋转载荷矩阵\n",
"rotator = Rotator(method='varimax')\n",
"rotated_loadings = rotator.fit_transform(loadings)\n",
"\n",
"# 输出旋转后的成分矩阵\n",
"rotated_components_df = pd.DataFrame(rotated_loadings,\n",
" index=PCA_data.columns,\n",
" columns=[f'Factor{i+1}' for i in range(rotated_loadings.shape[1])])\n",
"rotated_components_df = rotated_components_df.round(3)\n",
"\n",
"# 输出排序后的载荷矩阵\n",
"rotated_components_df=sort_matrix_by_diag(rotated_components_df)\n",
"print(\"旋转后的载荷矩阵(排序后):\\n\", rotated_components_df)\n",
2025-03-24 15:19:11 +08:00
"plot_heatmap(rotated_components_df, 4, 8,save_path=\"./images/components_heatmap.png\")"
2025-03-24 09:57:14 +08:00
],
"id": "509d783a82bbdcb2",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KMO值: 0.762\n",
"巴赫利特检验卡方值: 90424.712, p值: 0.0\n",
"数据适合进行主成分分析 \n",
"\n",
"累计方差解释比例: ['31.41%', '54.60%', '66.53%', '73.02%', '78.89%', '84.04%', '88.27%', '91.46%', '93.59%', '95.70%', '97.14%', '98.29%', '98.91%', '99.26%', '99.55%', '99.79%', '99.96%', '100.00%', '100.00%']\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjQAAAHGCAYAAABjORGMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABvDklEQVR4nO3deVxUVf8H8M9szMAAA6goCiLuOyruGqVZmmUlrmVmz6/FzNQ000xzeyxzfVpMs8y0NC2tNM2tKNMKxH3BpRQUZFUEhmX2Ob8/kMlxAEGBYeDzfr3mJXPuufd+71xivp17FokQQoCIiIjIhUmdHQARERHRvWJCQ0RERC6PCQ0RERG5PCY0RERE5PKY0BAREZHLY0JDRERELo8JDREREbk8JjRERETk8pjQEFGFMBgMuHTpUrHblyxZgpUrV0Kn05X6mGazGVFRUWWOxWq1YsuWLTh48KBd+ccff4xPPvkEmZmZZT5mVfDPP//g1KlTyM3NLVX9tLS0Co6IyHmY0BBVsueeew4SiQQSiQQymQweHh5o0qQJZs2aBYvF4lBfp9Phk08+wf3334+6devC3d0djRo1wtNPP429e/fe8XwPPPAAzp07V6YY9Xo9srKyoNPpoNfri33pdDpkZWXBYDDY7S+EQO/evfHYY4/BZDIVeY73338fmzdvhru7e6njevzxx9GzZ0/Ex8eX6Xq0Wi2mTZuGsWPH2mLVarWYNWsW/vzzT2g0mmL3FUJg7Nixd5VIFdLpdDh58iQ+//xzTJs2DRkZGYiJicHff/+N+Ph4XL58GZcvX8alS5dw+vRp3DqBe3x8PJ555hn8/PPPDsddtWoVunbtWuxnfKsrV64gKCgIU6ZMwZ9//omzZ8/i/PnzOHfuHI4cOYKMjAxb3SlTpuCxxx676+slcga5swMgqolat26NRYsWAQAyMzPx119/4b333kN2djY++ugjW72TJ09ixIgRiI+Px4gRI/Dcc8/B29sbV69exY4dOzBgwABERERgw4YNRSYGf/zxB37//XcsWrQI69atK3V8P/74I0aMGFHq+jt27LD7ApRIJJg6dSpGjhyJd955B3PnznXYx9PTEx4eHqU+BwD83//9H3bv3o0vvvgC8+fPL7FuRkYG1q1bB29vb3h6eiIiIgIGgwE//vgjJBIJtm3bBgDo06cP1q9fj5ycHDRt2hQDBw60O87mzZvx1VdfYfz48TAajdDpdHBzc8OIESPwzz//QCaT2epaLBa0bt0a3333HQwGAx566CGcP38e165dg4eHBx544AG0bNkSu3fvxujRoyGVSu3uW15eHh566CHs27fPViaXy7Fx40aMGTPG4Rrd3NxQt25d+Pr63vGzO3DgAEwmEwYOHIjevXs7bN+9ezcGDBgAoKAlrLStPkRVBRMaIifw9fW1SwBGjx4NLy8vLF++HAsXLoSnpycSExPRv39/+Pj44NSpU2jRooXdMSZNmoTdu3dj5MiR+OyzzzBx4kSH8yxYsAAAsHHjRsyfPx8NGzYsVXyFicatLQXbtm3D4MGDcfvybxKJBHK545+SESNG4P3338eJEydgtVohldo3CCsUimLP/9tvv+H8+fNQqVRQKBSQSqWQSqXQ6XT4v//7P7Ro0QKbN2+GEAJWqxUmkwkGgwF16tRBREQEgIIWmI0bN0KtVtuOI5PJMG/ePMTFxeGhhx5C79698c0339gSlQEDBtglNNevX8frr78OnU6H0NBQAEBISAhOnjyJSZMmITc3F08++SQ++ugjtG3bFrNnz4ZarQYAKJVKzJw5EyqVCr/99hv27NmDn376CbNnz0bz5s1x9OhR9O7dG+fOnUNQUBD++OMP3H///Vi4cGGRn5NEIinysyqu/HY7duxAaGgo+vXrh8TERLz44ovw8vLC2rVrkZeXh9zcXAwePBjTp093uFdELkEQUaUaM2aM6NWrl0P5u+++KwCIa9euCSGEePrpp4W3t7dISEgo8XhpaWlFlh85ckQAEHPmzBFqtVpMmDCh1DHu2LFDABAymcz2kkqlDmUymUwAELt37y7yOFlZWcWeo02bNqJ///5Fbhs/frwAINzc3ISXl5fw9fUVbm5uQiqVCj8/P+Hj4yM0Go3w9PQUarVauLm5CYlEIh588EG74+Tn54sbN24Ig8FgK1uyZIkIDg62q2c0GkVGRoawWq22shs3bohevXqJ4cOHC7PZLFJSUkT9+vXFH3/8Yavz22+/CYVCIW7cuCGEEKJ169Zi1qxZtu3PP/+8eOutt8TChQtFt27dRFxcnPD09BTp6elCCCFeeOEF8fjjj4tLly6JoKAgMW3aNIfPIiUlRQAQP//8s8O26dOn211LamqqGDFihNBqtXb1srOzhVqtFmPGjLGVBQcHi6VLl9reJyUlCQDi8OHDYtKkSeL+++93OB9RVcYWGiInsFqttib93Nxc/Pnnn1iyZAl69uyJ2rVrIycnB99++y2mTJmCoKCgEo/l7+9fZPk777yDOnXqYNq0acjKysKnn36Kt99+G3Xq1Cl1nLc+dtixYweGDx/u8Cji9kddO3fuREZGBqRSKfLy8jBkyBCkpqZi7dq18Pb2hlKphFwuR0ZGBsxmMxYtWgSTyYScnBx06NABTz31FP73v//ho48+smt9eOSRR5CSkoITJ04UG6/RaLR7/91332H06NG294UtSWazGSqVClarFWaz2dbqlJOTA09PTwDA4cOHYTAYsHPnTmg0GpjNZhiNRjzwwAOoXbs2oqOj8eqrr+Kll17CF198gffffx+JiYl45513bOd74okn8NZbb2HUqFEACh7r/Oc//7Hdg48++gjdunVD69at0bNnT7z77rvFXtuhQ4eg0+lgNpuRl5eHRx991G57dHQ0hg8fjsTERNSpU8fu0eXKlSuRl5dne5+YmIgrV67gxx9/xOHDh+Hr64s5c+YUe24il+DsjIqophkzZowAYPeSSCTioYceEomJiUIIIY4fPy4AiB9//PGuzhEbGyskEolYsGCBEEKIK1euCLlcLmbOnFmq/b/99ltbXIUtMbf+XPi+MP7t27fb9n3iiSfsru3atWu2Fh+FQiG8vLyEj4+PkMlkQi6XCy8vL6FUKgUAMXbs2GJjuu+++8TAgQOL3KbX64ssT0tLEzExMeL8+fPiypUrIiUlRcydO1cEBQWJtLQ0kZKSIq5cuSLOnTsnDh06JMxms93+GRkZovDP5BNPPCG++OIL8fPPP4vg4GAxZswYMXDgQKHT6cSJEyfEhAkTxJYtW2z7Wq1WER0dLY4dOybeeust0alTJ3HlyhVx4sQJodfrRWRkpIiIiBAymUz07dtXyGQycf/994vZs2eLzz//XOTm5goh/m2hUavVwtPTU7i5uQkA4vTp02L69OkiMDBQzJ07V8jlctGsWTO7eyGEEJcuXRKenp5CpVLZWmhWrVol6tWrJ/773/+Kbt26iU6dOtnOwxYaclVsoSFygnbt2mHVqlWQSqVQq9UICgqy69hZONrp1g6nZfHuu+/Czc0NTzzxBK5evQqpVIpHHnkEH3/8MaZNmwZvb+8S93/yySdhMBigUCiwc+dOzJs3D82bN8fq1avh7u6On376CcuWLUPfvn0xdepUKJVK276ff/451q9fjy+//BITJ06Et7c3BgwYAIPBADc3N1u9tm3bIjAwEHv27LFds9lsLjamtLQ0/PHHHw79daxWK15++WWsXLnSrvyzzz5DfHy8rf+MXC6HTCbDsWPHkJubi6+//trWB8dsNsNgMOCHH37AwIEDcd999wGALd6srCyYTCbk5+fbWjo+++wz/Pnnn7BYLNi9ezfOnz+PDz74wHZ+g8GA7t272+6hxWJBcHAwAGDq1KlYunQp+vbti3Xr1kGtVmPatGn44YcfsGnTJtSpUwf/93//Z3c927ZtQ79+/QAUtDAVHvfq1atYtGgR5s2bh6lTp9p9xkBBf6T69evb+gAVxj5u3DjMmjULp0+fdmjZInJFTGiInMDb2xu9evUqdnvTpk0hl8tx+PBhh1E3dxIXF4fNmzfDYrGgXbt2Dts/+eQTTJs
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"旋转后的载荷矩阵(排序后):\n",
" Factor1 Factor3 Factor2 Factor4 Factor5\n",
2025-03-26 14:57:30 +08:00
"Tn -0.963 0.035 0.071 -0.079 0.035\n",
"T -0.958 -0.138 0.033 0.074 -0.028\n",
"Tx -0.954 -0.014 0.045 -0.052 -0.063\n",
"P 0.924 -0.029 0.071 -0.032 -0.000\n",
"Po 0.921 -0.029 0.073 -0.033 -0.000\n",
"Td -0.898 0.366 0.043 -0.013 0.076\n",
"O3 -0.637 -0.529 -0.030 0.239 -0.084\n",
"U -0.322 0.824 -0.008 -0.156 0.229\n",
"Ff -0.045 -0.772 -0.126 0.024 0.172\n",
"NO2 0.300 0.728 -0.290 0.110 -0.202\n",
"CO -0.101 0.695 -0.449 0.298 -0.007\n",
"VV 0.153 -0.667 0.531 -0.093 -0.175\n",
"AQI -0.017 0.038 -0.967 0.025 -0.029\n",
"PM10 0.037 -0.060 -0.933 -0.092 0.003\n",
"PM2.5 0.049 0.359 -0.879 0.149 -0.007\n",
"Pa 0.006 0.055 -0.147 -0.747 -0.130\n",
"SO2 -0.035 0.099 -0.208 0.694 -0.065\n",
"RRR -0.139 0.094 0.103 -0.077 0.819\n",
"tR 0.163 -0.120 -0.087 0.131 0.512\n"
2025-03-24 09:57:14 +08:00
]
},
{
"data": {
"text/plain": [
"<Figure size 400x800 with 2 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAMWCAYAAADiW7FGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1hT1//A8XfYIwwZAirbrSiKe+OorXu2rlbrQmutdVTFvXHUXRVH3ba27lH7ax11K+JGZA9RNm5WQoDfH2g0JrR8lVSk5/U893nIuZ9788khyck55w5Jfn5+PoIgCILwD3TedwKCIAjCh0E0GIIgCEKRiAZDEARBKBLRYAiCIAhFIhoMQRAEoUhEgyEIgiAUiWgwBEEQhCIRDYYgCIJQJKLBEARBEIpENBjC/2TQoEE0a9ZM47rTp08jkUiIjY3Vag6xsbFIJBJOnz6t1ecRBEGVaDAEQRCEIhENhiAIglAkosEQtOrXX3+lcePGmJqaYmZmRrNmzfj111/V4vLz89m4cSNeXl4YGxtjbW1N3759iY6OxtPTk1mzZgHQqlUrXF1dAfD29kYikQAwa9YsKlSowO3bt2nbti0mJiY4OTmxbNkyAIKDg/n444+RSqW4u7uzatUqtRyio6MZOHAgdnZ2GBoa4uHhgb+/PwcOHFA+z8scBgwYQEBAAO3bt8fMzAypVErLli05fPhwcVehIJQYeu87AeHDk5eXR3p6ulp5VlaWyuPJkyezadMmRo4cybhx45DJZFy4cIEvvviCy5cvK7/M8/Pz+eKLL9i1axcDBgxgzJgxZGRksGfPHho0aICurq5yn35+foSEhDBkyBAWLFiAh4eHct2TJ09o2bIlffv2xcfHh7NnzzJ+/HhSU1NZv349AwYMYMiQIZw/f54xY8ZgYWHBwIEDAbh69Spt27alTJkyjB8/HicnJ4KCgpg4cSKOjo5qr/Xy5cvs3buX3r17s3r1auRyOYcPH6Zr167MnDlT2cAJQqmSLwj/g4EDB+YDf7vExMTknzp1Kt/ExCT/5s2b+c+fP1dZjh07li+RSPIDAwPz8/Pz87ds2ZIP5P/0009qzzdmzJh8IH/mzJnKspiYmHwg/6+//lKWzZw5Mx/IX7hwocr2H3/8cT6Qv2jRIpXyTp065bdv3z4/Pz8/PycnJ79ixYr5Xl5e+c+fP1eJi4qKyi9Tpkz+6x+Vli1b5gP5u3btUsvXz88vH8g/c+ZM0SpUED4gYkhK+J95eHhw7tw5teX1YZ4dO3aQmZmJp6cnZmZmKkuHDh3Iz8/njz/+AGD79u20bt2avn37qj3X4sWLMTc3L1JeEokEHx8ftVwBhg0bplJes2ZNoqOjAbh48SKRkZEsX74cqVSqEufm5sakSZPUnqt58+b069dPrXzSpElUrlyZ7du3FylnQfiQiCEp4X9mbm6u8dBahUKh/PvBgwc0adKERYsWFbqf8uXLK2M7dOigMcbAwIBKlSoVKS9LS0ssLS1VyoyNjbG0tKRMmTIq5YaGhsohtAcPHgCoDG+9rkaNGmpl1atX1xgrkUioXbs29+/fL1LOgvAhEQ2GoBXly5fnzJkzNG3aVGXCGEAul3Pw4EGaNGkCgKOjI3fu3NG4n5ycHCIiIor0nCYmJmplEolEYzkUzJ28fH6AO3fuaGwI7969W6Syl8LDw6lXr16RchaED4kYkhK0YsCAAcTExLB69WqV8vz8fMaNG8fQoUNJSEgACk4GPHnyJD///LPafnx9fXn27Nk75fJmg/WmJk2aUKlSJb799lu1yfzY2FgWL16sts25c+fYuXOnWvmqVau4deuWcjJdEEoT0cMQtKJNmzZ8++23jBkzhsDAQNq3b09WVha7du3i/Pnz7NixgwoVKgAFjcuff/5J//79+f3332nbti1ZWVns2bOHsLAwHBwcVPatr68PwJkzZ7C1tdU4ZPS/0NXV5aeffqJt27bUrFmTr776CmdnZ4KCgli9ejVVq1bl4cOHKttUr16dkSNH8ttvv/HRRx+Rl5fHwYMHOXr0KDNnzqR58+bvlJMglESiwRC0Zvny5Xh5ebF69Wr27duHkZERXl5enDhxglatWinjJBIJ27dvp1mzZqxbt45ff/0VCwsLunbtyq5du2jfvr3KfsuVK0f79u2ZP38+T548Yfny5e+ca7169bh27RozZ85kyZIlpKenU7NmTTZs2IChoSHdu3dXia9Tpw579+5l2rRpjB07FrlcTp06ddi/f79arCCUFpL8lwO5giAUSatWrahQoYLGISlBKM3EHIYgCIJQJKLBEARBEIpENBiC8D86ffq0GI4StOblNdL69Onzt3HHjh2jRo0aGBkZ0aBBA65cuaKyftGiRZQvXx5TU1N69OhBcnLyO+cmGgxBEIQSQqFQsG7dOkaNGvW3cXfv3qVHjx7079+fy5cv06pVK9q3b098fDwAmzZtYuHChSxbtoxTp06RlZVFly5deNcpazHpLQiCUMLMmjWL0NBQdu/erXG9j48PycnJHDx4UFnm7e1N/fr1Wbx4MVWqVGHkyJF8++23ADx//pzy5cuze/fuQq+qUBSihyEIgvCBOXfuHD179lQpGzBgAMePHyclJYXw8HCV9WZmZnTt2pXjx4+/0/OKBkMQBOEDExcXp7wvzEsVK1YkOjqauLg49PT01C7L/3L9uxAn7gmCIGiRTCZDJpOplBkaGmJoaPjW+8zKylK7oGaZMmXIyMggKytL7SKcr69/F6LBEASh1PlNv8r7TkEpcGpfZs+erVL2rjfZMjY2Jjs7W6XsyZMnmJiYaFz3+vp38Z9sMFr1uvS+U3grp/c2Ji4i5H2n8VacKlUjMfTm+07jrThU9SQo8t0PSXwfPCrafdDvmdLA19eXcePGqZS9S+8CwNnZmejoaLy8vJRlkZGRuLm54ezsTHp6Oqmpqdja2qqtfxdiDkMQBEGLDA0NMTc3V1netcFo0aIFR44cUSnbvXs3bdu2xdbWlqpVq6qsz8zM5MiRI7Rt2/adnvc/2cMQBKF0k+j//SXtPzRZWVl4eXnRo0cP5s2bx+jRo/Hy8qJ27dq0bduWPXv2EBAQwObNmwGYMGECkyZNwtLSEkdHR+bOnUvFihXf6ZBaEA2GIAjCB0EulyOXy4GCy+vv27ePCRMmMHXqVGrVqsWff/6pvGXAkCFDSE1NZdSoUTx79oyPPvqII0eOoKPzboNK/8kT98Qcxr9PzGG8H//VOYxjJlWLMZN30yEz9H2nUGxED0MQhFJHR690DUmVFGLSWxAEQSgS0cMQBKHUkeiL38LaIGpVEARBKBLRYAiCIAhFIoak3iNjIx28m9hw7FTK+05FEEoVMemtHSWuwXBxceHevXsqZS1btuT06dPvJyGgYR1LRnzhTHk7I6LuZbLyxxhCI9M1xhoa6DC0nyPeTWzQ0YEzlx/hv/0eMnmeWuzgPo7Ur23Jn2dTUSiK9+jmx4+fsHKtP9eu38DU1JRuXTrRp1cPjbEBgVfZuGUbCYlJuLm48PWIYVStUlljbFR0DOcvXWZg/77Fmu/rHj15wvJ1mwi8cRupqQk9On1Cv55dNcZevnoD/607iU9Kxt3FiTHDB1OtckUAxkydza07d9W26d6xPWOGD9Za/gDXAy+xffM6khMTcHZ1Y8iIb6lUpbrG2NxcBbu2buCvE7+Tq1DQqGlLvvT5BmPjguv+PHn8iK0bf+DmtQAMDI1o074jvfoMRFdXt9jz/pDfN4L2lbghqZMnTxISEkK3bt0YNmwYISEhbN++/b3l41zBmLnfVeHEuTS+mhLEzeCnLJlWDRsrA43x3w51pU4NC/xWRzJ3RQTVK0kZO9xVLa6SqyndP7Zn6froYm8sAOb4LSIjI4NF8+cwymcoe/cf5NDRY2px9+LuM3vBIlq3asGqpYuo7VED3xmzSUt7qBYrz8lh4dLl5OTkFHu+r5u5aDnpGZksnTuN0cO+5JeDRzjw2x9qcbFxD5i+cCltWzZj7eJ5eNasznezFpD68BEAU74dxbY1y5TLlLFfo6+vT7+e3bSa//24WJbMn07zVu1YsHQdNTz
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 42
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目3\n",
"令2022年11月1日至2023年9月30日的空气质量数据为训练集, 剩余数据为测试集。基于训练集, 尝试使用两种不同的方法构建空气质量指数预测模型, 并在测试集上测试。比较所选模型的预测效果。"
],
"id": "3f89fa62a897a3e3"
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T06:49:34.947179Z",
"start_time": "2025-03-26T06:49:33.890890Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#重新读取数据\n",
2025-03-26 14:57:30 +08:00
"data = pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
2025-03-24 17:31:14 +08:00
"data['date_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n",
2025-03-26 14:57:30 +08:00
"data = data[['date_hour', 'date', 'hour', 'AQI']].set_index('date_hour')\n",
"data = data.asfreq('3h')\n",
"data['AQI'] = data['AQI'].ffill()"
2025-03-24 09:57:14 +08:00
],
2025-03-24 17:06:38 +08:00
"id": "d1bdac1e4e1562f2",
"outputs": [],
2025-03-24 17:31:14 +08:00
"execution_count": 55
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
2025-03-24 15:19:11 +08:00
"source": "### (1)SARIMA模型\n",
"id": "1fc53937767d55fd"
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T06:55:01.678905Z",
"start_time": "2025-03-26T06:53:55.743487Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
"该模型在假设不知道测试集其他指标的情况下, 仅使用AQI历史数据预测未来AQI\n",
"\"\"\"\n",
2025-03-26 14:57:30 +08:00
"# 数据切分\n",
"train_end = pd.Timestamp('2023-09-30 23:00:00')\n",
"test_start = pd.Timestamp('2023-10-01 02:00:00')\n",
"train = data.loc[:train_end, 'AQI']\n",
"test = data.loc[test_start:, 'AQI']\n",
"\n",
"# 自动参数搜索\n",
"print(\"开始自动参数搜索(请耐心等待)...\")\n",
"model = auto_arima(\n",
" train,\n",
" start_p=0, start_q=0,\n",
" max_p=1, max_q=1,\n",
" seasonal=True,\n",
" m=8,\n",
" d=1,\n",
" D=1,\n",
" trace=False,\n",
" error_action='ignore',\n",
" suppress_warnings=True,\n",
" stepwise=True\n",
")\n",
"print(f\"最优参数组合: Order{model.order} Seasonal{model.seasonal_order}\")\n",
"\n",
"# 初始化模型\n",
"current_model = SARIMAX(train, order=model.order, seasonal_order=model.seasonal_order)\n",
"current_results = current_model.fit(disp=False)\n",
"\n",
"# 预测\n",
"predictions = []\n",
"lower_bounds = []\n",
"upper_bounds = []\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"for t in test.index:\n",
" forecast = current_results.get_forecast(steps=1)\n",
" pred_mean = forecast.predicted_mean.iloc[0]\n",
" pred_ci = forecast.conf_int().iloc[0]\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
" predictions.append(pred_mean)\n",
" lower_bounds.append(pred_ci.iloc[0])\n",
" upper_bounds.append(pred_ci.iloc[1])\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
" current_results = current_results.append(test.loc[[t]], refit=False)\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"forecast_df = pd.DataFrame({\n",
" 'predicted': predictions,\n",
" 'lower': lower_bounds,\n",
" 'upper': upper_bounds\n",
"}, index=test.index)\n",
2025-03-24 15:19:11 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"# 结果处理\n",
"valid_mask = forecast_df['predicted'].notna()\n",
"y_actual_valid = test[valid_mask]\n",
"y_pred_valid = forecast_df.loc[valid_mask, 'predicted']\n",
"\n",
"# 可视化\n",
"plt.figure(figsize=(15, 6))\n",
"train_last_3days = train.loc[train.index[-24]:]\n",
"train_last_3days.plot(label='训练集( 最后3天) ', alpha=0.7)\n",
"test.plot(label='实际值', color='green', alpha=0.7)\n",
"forecast_df['predicted'].plot(style='--', marker='o', markersize=5, label='单步预测值', color='red')\n",
"plt.fill_between(forecast_df.index,\n",
" forecast_df['lower'],\n",
" forecast_df['upper'],\n",
" color='pink', alpha=0.3, label='95%置信区间')\n",
"plt.axvline(test_start, color='gray', linestyle='--', alpha=0.6)\n",
"plt.title('AQI单步滚动预测结果 (SARIMA模型)')\n",
"plt.xlabel('时间')\n",
"plt.ylabel('AQI')\n",
2025-03-24 15:19:11 +08:00
"plt.legend()\n",
2025-03-26 14:57:30 +08:00
"plt.grid(alpha=0.3)\n",
"plt.tight_layout()\n",
"plt.savefig('./images/AQI-SARIMA.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 15:19:11 +08:00
"plt.show()\n",
2025-03-24 09:57:14 +08:00
"\n",
2025-03-26 14:57:30 +08:00
"# 评估指标\n",
"metrics = cal_metrics(y_actual_valid, y_pred_valid)\n",
"print(\"\\n单步预测评估结果: \")\n",
"print(f\"有效预测点数: {len(y_actual_valid)}/{len(test)}\")\n",
"for k, v in metrics.items():\n",
" print(f\"{k}: {v:.3f}\")"
2025-03-24 09:57:14 +08:00
],
2025-03-26 14:57:30 +08:00
"id": "5fb8d128fcab57d1",
2025-03-24 09:57:14 +08:00
"outputs": [
2025-03-26 14:57:30 +08:00
{
"name": "stdout",
"output_type": "stream",
"text": [
"开始自动参数搜索(请耐心等待)...\n",
"最优参数组合: Order(0, 1, 1) Seasonal(2, 1, 0, 8)\n"
]
},
2025-03-24 09:57:14 +08:00
{
"data": {
"text/plain": [
2025-03-24 15:19:11 +08:00
"<Figure size 1500x600 with 1 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABdEAAAJOCAYAAABYwk4SAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wUZf4H8M/MlvQeOgmQ0BFEBfRQbAiK3p3cWWk2VCwHKufp+fNsp4LtThQ5KyJKUfTOU7EiiqJyioLSERJKAgmQnmzfmef3x7O7ySa7m02ym0I+79crbDI7O/NstrD5zHe+jyKEECAiIiIiIiIiIiIiogbUth4AEREREREREREREVF7xRCdiIiIiIiIiIiIiCgIhuhEREREREREREREREEwRCciIiIiIiIiIiIiCoIhOhERERERERERERFREAzRiYiIiIiIiIiIiIiCYIhORERERERERERERBQEQ3QiIiIiIiIiIiIioiAYohMRERERERERERERBcEQnYiIiIiI2tSKFSvw7LPPtvUwCMBzzz2H5cuXt/UwiIiIiNoVhuhEREREFHVCCFRVVTVYvnXrVui63qRtWSwWbN26FaWlpREZW1P3H8g777yDr7/+GpqmRWBE7ZcQAqWlpY3eT03TUFpaCiFEo9u02Wy4+eabUVhYGKlhdniVlZWwWq1+yywWC8rKyiCEQE1NDcrKylq8H7fbjd27d/u9NgsLC3HTTTfBYrG0ePtERERExwuG6ERERESdyBdffIEpU6aEte5XX32FK6+8EllZWYiJiUGPHj1w3nnn4bXXXoPb7W6wft++ffG3v/0t4LZmz56Nc845xy+ss9lsOOOMMzBgwAAcPnw45Fjsdrvv++LiYowYMQKfffZZWPejpqYGBQUFAa+bNWsWhg0bFtZ2QnnkkUcwa9YsGAyGBtdVVFTg6NGjqKysRE1NDSoqKlBUVITy8nJ899132LVrF/Lz87F//37s27cPe/bswU8//YT8/HzfNlwuFz799NOwx/Ptt99iwoQJqKys9FuuaRouu+wy/PDDDw1u8+GHHyI9PR1btmwJue2ePXvCaDQiLi4OqampyMzM9H2lpqYiLi4ORqMRPXv2DGusH3zwAaqqqnDjjTf6lm3cuBFnnHEGEhMT0bdvXzz55JMhD3bs3bsXZ5xxRtDr9+/fD0VRfF9msxmZmZn4wx/+gE2bNvnWW7duHRRFwd69e33L3nvvPSxdurTR+3Hbbbc1+jy+6qqrwnocTzrpJMyePdtv2ZdffomsrCxYrVZ88cUX6Nu3b4OgHQD+85//4N5778XDDz+M+fPn47HHHsO8efPw97//HXfeeScAoLS0FLquw263Y/DgwVi1apXv9jfccANqamrw/vvvNzpOIiIios6CIToRERFRJ/Loo4/i7bffRl5eXtB1NE3DzTffjLPPPhv79u3D3LlzsXLlSjz66KPo1q0bbrjhBvzmN7/Bvn37wt7v7bffjn379uG3v/2trzr59ddfh9VqxSuvvNJo4JqdnY2ZM2cCAOLj4wHICvL9+/dj//792Lt3L7Zs2YJjx4753U4IgYkTJ2LixIkoLy/HkSNHUFJSApvNBrfbjZEjR2LXrl3YvHkzXC4XrFYrjh496red4uJi7Ny5E4WFhSgsLMSBAwfw008/wel0+taJjY3FmDFjAo79lVdeQbdu3ZCamoqkpCSkpaVh+vTpKCoqwumnn44hQ4Zg4MCB6NevHwYPHowhQ4Zg1KhRePXVV33b+OSTT3DhhRfioosuwr/+9S+cd955mDx5MiZPnoyLL74YZ599NtatWwer1Qq32w1N07Bx40akpKT4jWX9+vV455138PrrrwMAqqqqYLPZAACpqakoLy9HcnIyABnc1692VhQFcXFxePzxx7F161b8+OOPeOWVV1BaWor//ve/+PHHH7F161Y8/vjjiIuLg6IoIR9XAPj8888xcOBA9O/fHwBw7NgxXHDBBVBVFa+++iquuOIK3HPPPX4he33z58/Ht99+i3Xr1oXc1yOPPIIPPvgAb731Fu6//37k5+dj/PjxfqF5fXv27MHDDz8c8MCRV1FREV566SXs2bMn6Dr79u3DypUrMX/+/JBjBIDk5GQMGDAAgAy8rVYrYmNjERcXh4SEBCQkJCA+Ph5xcXFwOp1+4X1VVRWOHj2K559/Hu+88w7279+PpUuXYtmyZTh48CAAYOjQofjXv/6FxMREqKqKzMxM3+1zc3MxcOBAfP75542Ok4iIiKjTEERERETUKWzYsEEAEADEjTfeGHS9OXPmCEVRxLPPPhvw+s2bN4usrCwxZMgQUVlZ6Vvep08fce+99/qtW1ZWJgoKCsSRI0fEG2+8IT7//HNRXl4uSkpKRN++fcX//d//ibKyMnH48GFRVVUVdEz9+/cXDzzwgBBCiKKiIgFAmEwmERMTI8xms1BVVQAQK1eubHDbjRs3ivj4eHHppZeKiy++WAAQZrNZxMfHi8TERJGcnCwSExNFfHy8MJvNAoC4+uqrfbdfsmSJACAMBoPfvoqKinzrnHrqqeIvf/lLwLFbLBZRWFgoiouLxYYNG0Tv3r1FUVGR0DRN1NTUCF3XxcKFC0V6erpwOp1CCCEcDoew2Wx+2/n222/FxIkTxSuvvCJ69Ogh5s+f7/uKj48XX3/9tRg8eLB46qmnxPfffy9SUlIajGXWrFliwIABwmazifLycnHFFVcIAEJRFGE0Gn2/V0VRfM8Vl8vlt42UlBSxZMkSv98vAFFQUOD3Owu0/0AGDx4sbr75Zt/P//nPfwQA8csvv/iWvf766+Krr74KePuDBw8Kk8kkAIiJEycGXGffvn0CgFizZo3f8oqKCmEymcRdd90lhBDiyy+/FADEnj17fOvceOONAoB49dVXg96HOXPmCADi5ZdfDrrOrFmzfL/T7777Luh6QggxatQoMX/+fCGEELfddpvvdsG+cnNzG2zjrLPO8r0er776ajFz5kzfdf369RNvvPGGEEKImJgY8e677/rd9qabbhIDBgwIOUYiIiKizoSV6ERERESdxKOPPooePXrgjjvuwNKlSwO2nvjll1+wcOFC3HPPPQ3aSXiNHDkSH330Efbu3Yunnnoq5D4XLFiArKwsdOvWDTNmzMB5552HtLQ0ZGZmYv/+/Zg3bx7S09PRs2dPvPPOO0G3E6hNykcffQS73Q6HwwFN0+B0OnHppZc2WG/UqFF499138cILL+DNN9/E3r17cf/996O0tBTV1dWorKzE6tWrccUVV+DAgQNwu9148cUXfbefOnUq7HY73G63b181NTV44IEHcNppp2HcuHHYsWMHVq5ciZNOOgn9+/dHZmamr2VKfHw8FixYgEmTJuHaa6/Fv/71L3Tv3h1WqxXffPMNvv32W3z++ec48cQT8dNPP2H9+vX46quvEBsb63c/xo4di08//RTJycno2rUrZs+ejUmTJuG2225DXFwcTCYTMjMzkZCQAFVt+DG/tLQUy5cvx/PPP4/i4mL0798fZ5xxBr755hts3rzZN5nkJ598gp9//hnff/89Pv3004C/+2uvvdbXGmX06NEAgKysLN+ya6+9NuhjWZfb7cbevXtxwgkn+JZlZWUBkK2HvGbMmIEzzzwz4DaeeOIJGAwGPPTQQ/jss8/w008/hbVvAEhKSkJsbGzI/t979uyB0WjEI488ErAa3VuFrqoqfv3114DbOHz4MF577TXMnj0bWVlZQavRP/jgAyxcuBAlJSX43//+h6effhqXXHIJCgoK8MknnyAjIwNCCHz55Zfo1q0bLBYLSkpKArbnCSXQ86OuESNGID8/Hy6Xq0nbJSIiIjpeMUQnIiIi6gS2bNmC1atXY+7cubjrrrsAAE8//XSD9ZYvX46YmBjcc889Ibd3wgkn4PLLL8fixYtDrmcymdCtWzcIIXxfZ511FmbNmuW3LCMjo0HrD4vF0qRJP00mU4MJTBctWoTi4mJMnDgRGRkZOHz4MM4//3y8//772Lt3L2666SZs3boVXbp0we7duzFo0CC8/vrriImJAQA4nU64XC6YzWa/fRk
2025-03-24 09:57:14 +08:00
},
"metadata": {},
2025-03-24 15:19:11 +08:00
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-03-26 14:57:30 +08:00
"\n",
"单步预测评估结果:\n",
"有效预测点数: 248/248\n",
"RMSE: 11.893\n",
"R-squared: 0.932\n",
"MAE: 7.744\n"
2025-03-24 15:19:11 +08:00
]
2025-03-24 09:57:14 +08:00
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 57
},
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "bb1765c239b38d1c"
2025-03-24 09:57:14 +08:00
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### (2)XGBOOST模型",
2025-03-26 14:57:30 +08:00
"id": "345e80b2d1b4204b"
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:30:11.107569Z",
"start_time": "2025-03-26T02:30:11.063882Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
"该模型在假设不考虑测试集其他指标的情况下, 仅使用AQI数据对未来AQI进行<单步预测>, 即每次预测都是根据之前时间点的真实AQI值进行的。\n",
2025-03-24 17:31:14 +08:00
"整体运行时间约为25s, 请耐心等待。\n",
2025-03-24 09:57:14 +08:00
"\"\"\"\n",
"#特征工程\n",
2025-03-24 17:31:14 +08:00
"data=data[['AQI']]\n",
2025-03-24 09:57:14 +08:00
"data_processed = data.copy()\n",
"\n",
"#时间分解特征\n",
"# 基础特征\n",
"data_processed['hour'] = data_processed.index.hour\n",
"data_processed['day_of_week'] = data_processed.index.dayofweek\n",
"data_processed['month'] = data_processed.index.month\n",
"\n",
"# 周期性编码\n",
"data_processed['hour_sin'] = np.sin(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['hour_cos'] = np.cos(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['week_sin'] = np.sin(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"data_processed['week_cos'] = np.cos(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"\n",
"#滞后特征\n",
"# 生成3小时粒度的滞后特征( 最多7天) \n",
"lags = [i for i in range(1, 7 * 8 + 1)] # 7天*每天8个时间点( 3小时间隔) \n",
"for lag in lags:\n",
" data_processed[f'AQI_lag_{lag}'] = data_processed['AQI'].shift(lag)\n",
"\n",
"# 划分数据集\n",
"train_data = data_processed.loc['2022-11-01':'2023-09-30']\n",
"test_data = data_processed.loc['2023-10-01':]\n",
"\n",
"# 特征选择\n",
"features = [col for col in train_data.columns if col != 'AQI']\n",
"X_train, y_train = train_data[features], train_data['AQI']\n",
"X_test, y_test = test_data[features], test_data['AQI']"
],
2025-03-26 14:57:30 +08:00
"id": "8920b904e0069cf7",
2025-03-24 09:57:14 +08:00
"outputs": [],
2025-03-26 14:57:30 +08:00
"execution_count": 45
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:30:37.832084Z",
"start_time": "2025-03-26T02:30:11.212688Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#随机搜索法参数调优(这里耗时较长,请耐心等待)\n",
"param_dist = {\n",
" 'n_estimators': [100, 200, 300],\n",
" 'max_depth': randint(5, 10),\n",
" 'learning_rate': uniform(0.01, 0.2),\n",
" 'subsample': uniform(0.7, 0.3),\n",
" 'colsample_bytree': uniform(0.7, 0.3),\n",
" 'gamma': uniform(0, 0.3)\n",
"}\n",
"\n",
"search = RandomizedSearchCV(\n",
" XGBRegressor(n_jobs=-1, random_state=42),\n",
" param_distributions=param_dist,\n",
" n_iter=10,\n",
" cv=3,\n",
" scoring='neg_mean_absolute_error',\n",
2025-03-24 17:06:38 +08:00
" verbose=1,\n",
" random_state=42\n",
2025-03-24 09:57:14 +08:00
")\n",
"search.fit(X_train, y_train)"
],
"id": "199aa487e826c1ac",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 10 candidates, totalling 30 fits\n"
]
},
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=3,\n",
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 14:57:30 +08:00
" feature_weights=None, gamma=None,\n",
" grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 14:57:30 +08:00
" interaction_constraint...\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153c7e510>,\n",
" 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x153c7e810>,\n",
2025-03-24 09:57:14 +08:00
" 'n_estimators': [100, 200, 300],\n",
2025-03-26 14:57:30 +08:00
" 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153ce6b40>},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring='neg_mean_absolute_error',\n",
" verbose=1)"
2025-03-24 09:57:14 +08:00
],
"text/html": [
2025-03-26 14:57:30 +08:00
"<style>#sk-container-id-1 {\n",
2025-03-24 09:57:14 +08:00
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 pre {\n",
2025-03-24 09:57:14 +08:00
" padding: 0;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 input.sk-hidden--visually {\n",
2025-03-24 09:57:14 +08:00
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
2025-03-24 09:57:14 +08:00
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-container {\n",
2025-03-24 09:57:14 +08:00
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
2025-03-24 09:57:14 +08:00
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel-item::after {\n",
2025-03-24 09:57:14 +08:00
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel-item {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
2025-03-24 09:57:14 +08:00
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
2025-03-24 09:57:14 +08:00
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
2025-03-24 09:57:14 +08:00
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-serial {\n",
2025-03-24 09:57:14 +08:00
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-toggleable {\n",
2025-03-24 09:57:14 +08:00
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
2025-03-24 09:57:14 +08:00
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-toggleable__content {\n",
2025-03-24 09:57:14 +08:00
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
2025-03-24 09:57:14 +08:00
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2025-03-24 09:57:14 +08:00
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2025-03-24 09:57:14 +08:00
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
2025-03-24 09:57:14 +08:00
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2025-03-24 09:57:14 +08:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label label {\n",
2025-03-24 09:57:14 +08:00
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-label-container {\n",
2025-03-24 09:57:14 +08:00
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator {\n",
2025-03-24 09:57:14 +08:00
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator:hover {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 a.estimator_doc_link {\n",
2025-03-24 09:57:14 +08:00
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
2025-03-24 09:57:14 +08:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
2025-03-26 14:57:30 +08:00
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
2025-03-24 09:57:14 +08:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
2025-03-26 14:57:30 +08:00
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomizedSearchCV(cv=3,\n",
2025-03-24 09:57:14 +08:00
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 14:57:30 +08:00
" feature_weights=None, gamma=None,\n",
" grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 14:57:30 +08:00
" interaction_constraint...\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153c7e510>,\n",
" 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x153c7e810>,\n",
2025-03-24 09:57:14 +08:00
" 'n_estimators': [100, 200, 300],\n",
2025-03-26 14:57:30 +08:00
" 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153ce6b40>},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring='neg_mean_absolute_error',\n",
2025-03-26 14:57:30 +08:00
" verbose=1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> RandomizedSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.RandomizedSearchCV.html\">?<span>Documentation for RandomizedSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>RandomizedSearchCV(cv=3,\n",
2025-03-24 09:57:14 +08:00
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
2025-03-26 14:57:30 +08:00
" feature_weights=None, gamma=None,\n",
" grow_policy=None,\n",
2025-03-24 09:57:14 +08:00
" importance_type=None,\n",
2025-03-26 14:57:30 +08:00
" interaction_constraint...\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153c7e510>,\n",
" 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x153c7e810>,\n",
2025-03-24 09:57:14 +08:00
" 'n_estimators': [100, 200, 300],\n",
2025-03-26 14:57:30 +08:00
" 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x153ce6b40>},\n",
2025-03-24 17:06:38 +08:00
" random_state=42, scoring='neg_mean_absolute_error',\n",
2025-03-26 14:57:30 +08:00
" verbose=1)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
2025-03-24 09:57:14 +08:00
" colsample_bylevel=None, colsample_bynode=None,\n",
2025-03-26 14:57:30 +08:00
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
" n_jobs=-1, num_parallel_tree=None, ...)</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> XGBRegressor<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://xgboost.readthedocs.io/en/release_3.0.0/python/python_api.html#xgboost.XGBRegressor\">?<span>Documentation for XGBRegressor</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
2025-03-24 09:57:14 +08:00
" colsample_bylevel=None, colsample_bynode=None,\n",
2025-03-26 14:57:30 +08:00
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
" n_jobs=-1, num_parallel_tree=None, ...)</pre></div> </div></div></div></div></div></div></div></div></div>"
2025-03-24 09:57:14 +08:00
]
},
2025-03-26 14:57:30 +08:00
"execution_count": 46,
2025-03-24 09:57:14 +08:00
"metadata": {},
"output_type": "execute_result"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 46
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:30:38.024387Z",
"start_time": "2025-03-26T02:30:38.009441Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#模型预测\n",
"best_model = search.best_estimator_\n",
"y_pred = best_model.predict(X_test)\n",
"#评估指标\n",
"metrics=cal_metrics(y_pred, y_test)\n",
"#输出结果\n",
"print(\"最佳参数组合:\", search.best_params_)\n",
"print(\"评估指标:\")\n",
"for k, v in metrics.items():\n",
2025-03-24 17:06:38 +08:00
" print(f\"{k}: {v:.3f}\")"
2025-03-24 09:57:14 +08:00
],
"id": "fe076794bae89ccb",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2025-03-24 17:06:38 +08:00
"最佳参数组合: {'colsample_bytree': 0.9826605267054558, 'gamma': 0.16898646535366177, 'learning_rate': 0.08708330050798323, 'max_depth': 6, 'n_estimators': 100, 'subsample': 0.7692681476866446}\n",
2025-03-24 09:57:14 +08:00
"评估指标:\n",
2025-03-26 14:57:30 +08:00
"RMSE: 12.332\n",
"R-squared: 0.922\n",
"MAE: 8.107\n"
2025-03-24 09:57:14 +08:00
]
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 47
2025-03-24 09:57:14 +08:00
},
{
"metadata": {
"ExecuteTime": {
2025-03-26 14:57:30 +08:00
"end_time": "2025-03-26T02:30:40.218109Z",
"start_time": "2025-03-26T02:30:38.179832Z"
2025-03-24 09:57:14 +08:00
}
},
"cell_type": "code",
"source": [
"#预测结果可视化\n",
"def plot_results(y_true, y_pred, timestamps):\n",
" plt.figure(figsize=(18, 8))\n",
" ax = plt.gca()\n",
"\n",
" # 绘制预测曲线\n",
" ax.plot(timestamps, y_true, label='真实值',\n",
" marker='o', markersize=4, linewidth=1, alpha=0.8)\n",
" ax.plot(timestamps, y_pred, label='预测值',\n",
" linestyle='--', marker='x', markersize=5, alpha=0.9)\n",
"\n",
" # 设置时间轴格式\n",
" ax.xaxis.set_major_locator(HourLocator(interval=12))\n",
" ax.xaxis.set_minor_locator(HourLocator(interval=3))\n",
" ax.xaxis.set_major_formatter(DateFormatter(\"%m-%d %H:%M\"))\n",
"\n",
" # 增强可视化元素\n",
" plt.title(f'AQI预测效果对比( MAE={metrics[\"MAE\"]:.2f}, R-squared={metrics[\"R-squared\"]:.2f}) ',\n",
" fontsize=14, pad=20)\n",
" plt.xlabel('时间', fontsize=12)\n",
" plt.ylabel('AQI', fontsize=12)\n",
" plt.grid(True, which='both', linestyle='--', alpha=0.5)\n",
" plt.legend()\n",
"\n",
" # 自动调整标签\n",
" plt.xticks(rotation=45, ha='right')\n",
" plt.tight_layout()\n",
2025-03-24 15:19:11 +08:00
" plt.savefig('./images/xg_by_step.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
" plt.show()\n",
"\n",
"plot_results(y_test, y_pred, test_data.index)\n",
"\n",
"#特征重要性可视化\n",
"def plot_importance(model, features, top_n=20):\n",
" importance = pd.Series(model.feature_importances_, index=features)\n",
" top_features = importance.sort_values(ascending=False)[:top_n]\n",
"\n",
" plt.figure(figsize=(12, 8))\n",
" ax = top_features.sort_values().plot.barh()\n",
"\n",
" # 添加数据标签\n",
" for i in ax.patches:\n",
" ax.text(i.get_width() + 0.02, i.get_y() + 0.2,\n",
" f'{i.get_width():.2f}',\n",
" fontsize=10, color='dimgrey')\n",
"\n",
" plt.title('Top {} 重要特征'.format(top_n), fontsize=14)\n",
" plt.xlabel('特征重要性', fontsize=12)\n",
" plt.tight_layout()\n",
2025-03-24 15:19:11 +08:00
" plt.savefig('./images/xg_feature_importance.png', dpi=200, bbox_inches='tight')\n",
2025-03-24 09:57:14 +08:00
" plt.show()\n",
"\n",
2025-03-24 17:06:38 +08:00
"np.random.seed(42)\n",
2025-03-24 09:57:14 +08:00
"plot_importance(best_model, features)"
],
"id": "2551eec52baeb4cb",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 1800x800 with 1 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABv0AAAMVCAYAAABUfzjNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xTVf8H8M9N0jRdSUtbKGWUMlVAUZAt+lMZLlREEHDhQAQFBypOUBy4kQecqCAIKAoK4kLFhSggKsiQVWS1QFfSNvve8/sjJjRN0qaQNLfl8369eAnfe3PO554TDs/D4d4rCSEEiIiIiIiIiIiIiIiIiKje0sQ6ABERERERERERERERERGdGG76EREREREREREREREREdVz3PQjIiIiIiIiIiIiIiIique46UdERERERERERERERERUz3HTj4iIiIiIiIiIiIiIiKie46YfERERERERERERERERUT3HTT8iIiIiIiIiIiIiIiKieo6bfkRERERERERERERERET1HDf9iIiIiIiIiIiIiIiIiOo5bvoRERERERGpzMGDB/HLL7/EOgYR1aEdO3bg77//jnUMIiIiIqrHuOlHRERERFTJjz/+iEsvvTTWMSKmuLi41p/5/vvv8fDDD2Pnzp1Bjx88eBCvvPIKVq5cGfT4u+++i40bN8Ltdlfbj9VqRXFxMRRFCStXRUUFioqKwjq3Js8++yweeOAB/P777wHHbDYbXC5XyM8++eSTuP766/Hdd99FJEtVQghcffXVWL16dVTaD2bv3r3o0qVLnfVHdLwWLFgASZLw/fffxzpKxBUVFeGcc85Bfn5+rKMQERERUT3FTT8iIiIiokpeeuklfP755/jnn3/C/swXX3yBkSNHolWrVkhISEB6ejrOOussPPLII9i9e3fQz3z//feQJAkLFizw1X788UcsXrwYn376KT777DOsWLECy5Ytw+LFi7Fr1y5MmTIFM2fOxBtvvIE5c+Zgzpw5eOutt/Dqq6/i+eefx9tvv+3Xx1tvvYXc3FzMmjULNpsNs2bNwtKlS/HZZ5/52l+wYAE++eQTv8/t3bsXTz/9NHJzc4Nm37dvH+666y5MmzYt4JjNZsOYMWMwePDgGjccN23ahPT0dGi1WhgMBhiNRqSnpyMjIwMZGRlIT09HamoqEhMTodVqkZycjKlTp1bbZrjWrFmD5557Dp07dw449sMPP0Cv1+O+++4L+tndu3dj/vz5aNeuXUSyVPXGG29g+/btmDhxYtDjgwYNCnujdOrUqfjxxx9rPG/GjBn466+/sGrVqmrPa9WqFSRJCvljxowZYeUK5s8//8SQIUPQrFkzGAwGnHnmmXj33XfD/rwQAv/3f//ny/Lkk0+GPPfo0aPIzMz0nfvzzz8fd24AuPHGG31taTQaxMfHIyMjA3369MEbb7wR9nxR3Vm0aBF69OiBpKQkNGrUCJdffjk2b94c9ue//PJLnHfeeUhKSkJKSgoGDBiAtWvXBj137ty56NatGxISEpCamopBgwYF/c716tULZ511Fu66667jvSwiIiIiOsnpYh2AiIiIiEgtdu7ciRUrVkAIgVdeeQWvvvpqtecfPXoUI0eOxDfffIOEhASce+65uPTSS+FwOPDXX3/h6aefxrPPPospU6bgwQcfhFarrba9hQsX4o033giov/TSSzhw4ACeeOIJAIBOp4Pb7YYkSb6fCyFwwQUX4Oabb/Z9rn///ujRowfuvPNOyLIc8i+Sr732WjRu3BgrVqzAuHHjkJiY6OsnmF69eiE7OxulpaU4fPgwmjRp4jv2448/wu1246GHHkLjxo2rvV6j0QgAePrpp3HllVf66k8++SSWLVvmdxeeLMvo1KmT7zOV/frrr5AkCXq9HpIkAfBsAFmtViQmJuLMM88M+Iz32vR6fcCx1NRUAEDHjh2D5vbOY2ZmZrXXdzysViseffRR3H777UhOTg44brPZ8PXXX+ODDz7AiBEjqm3LbDbjlVdeQYsWLdCvX7+Q55WWlvo2jF9++WX079+/2nZbtmyJq6++Ouixrl27VvvZUJYuXYphw4bBYDDgsssuQ1paGtatW4ebbroJf//9N1588cUa29i9eze6du2Kdu3a4a233qr23I0bN+KGG27Ali1b8OWXXx5X5qokScI999wDAHC5XDh8+DC++eYbjB07Fn/99VeN6wnVnWeeeQYPPfQQmjVrhmuvvRbl5eX49NNP8fXXX+Prr7/GOeecU+3nZ82ahTvvvBONGzfG0KFD4XA4sHLlSpx77rlYtmwZLrnkEt+5Y8aMwVtvvYXWrVtj1KhRsFgsWLlyJc477zwsXLgQw4YN82v73nvvxSWXXIKJEyeid+/eUbl+IiIiImrABBERERERCSGEuP322wUA0bRpU5GYmCiKiopCnltSUiJOPfVUAUDceuutorCwMOCcTZs2iZ49ewoA4pZbbvE7tnr1agFAzJ8/31crLCwUBQUFoqioSJSWloobbrhB9OjRQ8iyLBwOhygtLRWKogir1SoAiNtuu8332YqKClFSUhKQQZZl8eqrrwqXyyV0Op14/fXXRVlZme9H3759xZgxY8SyZcsEAJGfn+/7eWUzZswQs2fPFvPmzRMLFy4Uc+bMEfPnzxdvv/22ePHFF8Wbb74phBDipptuEiaTSZSXl/s+a7VaxcGDBwOybdu2TQAQ7777rl99/PjxIikpya/mcrkEAPHwww8HtNO1a1cBIOiPO++8M+B8IYQYMmRIwDV6rV+/Pmgur5tvvlkAEDabLejxEzFr1iwhSZLIy8sLenzz5s0CgDjllFOELMvVtjVlyhQBQDzwwAPVnvfss8/6vveSJImtW7eGPDcnJ0dccMEFNV5HbbVq1UoYjcaA677zzjsFALF9+/aw28rLyxMAxLRp02o899133xUAxE8//VTbyH5uuOEGodVqA+pWq1WcffbZAoDYuXPnCfVBHvPnzxcAxOrVq4/r8//884/Q6XTi7LPPFmaz2a+ekZEhcnNzhd1uD/n5ffv2Cb1eL8466yxRXFzsq+fl5YmmTZuKrKws39qwfPlyAUBceeWVfm3u3btXNG/eXKSmpgqLxeLXvizLomXLlmLgwIHHdX1EREREdHLj4z2JiIiIiOB5l9K8efPQrVs3zJw5E1arNehdd17jx4/Htm3bMG3aNLz55ptIT08POKdz58744YcfMGDAAMyZMwfz58+vNkN6ejoWLFiAcePG4ffff8eiRYvw1ltvQaPRQK/Xw2QyQZIk5OXlAQAGDhzo+2xiYqLvDrXKNBoNbr/9duh0OsTHxyM+Ph7btm3D0qVLIUkStFotJEkK+tnKXn75ZYwfPx433HADRo4ciVtuuQXXXXcdbr75Ztx7772YN28eioqK8MEHH6CsrAxNmjTxPZYzMTERrVu3Dtl2fn4+tm/f7vtRWloKIYRfrbrHrb766qv4+eefsX79et8P72NATz31VEybNg2vvPIKZs2a5fuxZ88eAJ47dl544QWsX7++2uuvya5du/D444+fUBuA51rOOusstGrVKuhx73sWt2/fjoULF4Zsx3uXX+XPBONyuTBz5kxkZ2fjo48+ghDihB7R6VXdI0C9PypfY3FxMZo2bRpw3d47nWrz2EU1SUhIwMiRIwEg6Psjqzp06BCGDRuGRo0aITs7G4899hgcDoffObIsY86cOejVqxdSUlJgNBpx/vnnY+XKlThw4AAkSfJ7DK4kSbjllluC9hfq/XiKouDdd99Fnz59fH3069cPS5cuRWFhoV8fU6dOhSRJWL58OTp27IiUlBS/xxYDnkdpnnvuuTAajUhMTESXLl0wffp02Gy2oLnMZjMee+wxnHrqqTAYDMjMzMSwYcOwZcuWgHPnzp0b1vfNm/fdd9+F2+3G7Nmz/e4cbt++PZ5++mnk5eVh+fLlQXMBwMqVK+F0OjF9+nSkpaX56q1atcLDDz+MgoIC/PLLLwA8d29rNBq8/vrriI+P952bk5ODRx99FKWlpQGP+dRoNLj66qvx9ddf+9Z6IiIiIqJw8fGeREREREQAXnvtNVitVtx
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x800 with 1 Axes>"
],
2025-03-26 14:57:30 +08:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAMWCAYAAAAgRDUeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAD4uUlEQVR4nOz9eVjV1f7//z+2EMNWQEX6iIBCQhqiRm8ysA4BVqLoKTGnHMrUMrUsy7TBHN9HzvH4RQbP2zBNzFAzrTyox0xoQH0XmWEOiOaUiYkTICAI7t8f/drvdhsVCTYO99t1va4r11qv13q+hD/yca21XgaTyWQSAAAAAAAAYEONGroAAAAAAAAA3HoIpQAAAAAAAGBzhFIAAAAAAACwOUIpAAAAAAAA2ByhFAAAAAAAAGyOUAoAAAAAAAA2RygFAAAAAAAAmyOUAgAAAAAAgM0RSgEAANykjh07ppKSkmr7iouLVVlZWavnlpeXq7CwsNq+ixcvKjMzUzt27Ki2f+bMmXr++ed15syZGs934cKFWtUJAACub4RSAADguubr6yuDwXDZ63pw+vRpjR8/Xq1bt5aDg4Nat26tF154QadPn7Yau337dsXExKhZs2Zq3Lixunbtqo8++qhG8xQUFOiDDz7Q+vXrtWHDBq1bt05r1qzRkiVLNG/ePL3++ut6+umnFRERIS8vL/n4+Oi1116r9lkTJ07UbbfdJnt7ezk5OalJkyZXvIxGoxwcHNSoUSM5OTlpxIgR1T7Xzs5O3bp1U0JCQrX93333nT755BM1b968Ru/8xRdfqH379srIyKjReAAAcOOwb+gCAAAAruSZZ54xr6pZuXKljh07ppdffrmBq/o/P//8s+6//34dOXJEUVFRiomJ0Q8//KCkpCStX79eW7du1e233y7p14DlkUce0W233aZHH31UTZo00fr16xUbG6s5c+bolVdeueJcbm5uGj16tM6ePWtuMxqNatq0qc6ePavGjRsrNDRULVu21B133CE7OzsVFhbqyJEjatOmjcWzXn31VY0ZM0ZNmjSRg4ODbrvtNu3cuVOPPPKIvvvuO/n4+OjTTz/V4MGDdejQIRmNRl28eFEVFRUqKyuTvb3l/0bu3LlTzZs3V6tWrWQ0GuXt7W3u++GHH2QwGOTi4iKTySSTyaRjx47pwoULKioqUtOmTXXHHXdYvW9JSYlGjhypoqIiubu76/Tp05o6daqaNWsmo9Eoe3t7czB56dIlXbx4UefPn1erVq30/PPPX9sPEgAA2JzBZDKZGroIAACAmoiIiNAXX3yh6+l/X/r27as1a9Zo5cqV6t+/v7l90aJFGjlypEaMGKF33nlHlZWVat++vU6fPq2tW7fqrrvukiSdP39e0dHR+vrrr5WTk6PAwMBq5yksLNSlS5fUqNGvC93t7e3l7Oxs/nNERISaNGmi9PR0q3srKipUWloqSWratKm53WQyqaysTE5OTmrUqJG+/fZb3XvvvTp06JB8fX2Vnp6u3r17q7i4WE2aNNGlS5d04cIFOTs7W61S69Onj9avX68jR46offv2GjdunGbNmiVJio6O1saNGy/7d/jGG2+Yx/6+tkGDBmn16tX6z3/+o27duunw4cPy8/OTJDk6Osre3l4lJSXmesrLy1VVVaXu3bvrP//5z2XnAwAA1we27wEAgJvGwYMHNXLkSPn4+MjR0VHe3t4aMWKEDh48aDU2IiJC/v7+qqys1Lx589SpUyc5OzurRYsW6t27t7744ourznf+/HmtXbtWvXr1sgikJGnEiBEKCwvTv//9b0nS5s2b9eOPP+r11183B1KS1KRJEy1atEhVVVWaP3/+ZeeaOnWqmjdvrqZNm6pZs2Zyd3eXq6ur3Nzc1LRpU2VlZWnjxo1q2rSpmjZtKjc3Nzk7O8vOzk6Ojo5q1qyZRo4caVV/48aNZWdnJ3t7e3Xt2lWS1K5dOzk5OSk2NlaS5O7uLnt7e9nZ2alx48Y6f/68VX0nT55UWFiYWrZsadX31FNPaeHChVq5cqUefPBB3X777UpPT9eHH36oRYsWKSYmxmL8pUuX9Oyzz2rlypVasGCBHnzwQUmSj4+PTp8+raqqKl24cEG7du2SJP3rX/9SSUmJKisrVVJSohUrVlz27xEAAFw/2L4HAABuChs3btTjjz+u8vJyRUdHy9fXVwcPHtTSpUu1atUqrVmzRg899JDFPeXl5erRo4c2b96syMhIPfjggzp9+rQ2bNig9PR0zZo1S2+88cZl5zx16pQ6dOigPn36VNvfsmVL7du3T5LMIVe/fv2sxrVr10733nuvPv3008vONW7cOD3xxBNq3LixnJ2dddttt+m2225To0aNZDAY9Nhjj6lx48ZatmyZTCaTLl26pMrKSl28eFFlZWUqLi6Wo6OjxTNdXFzUqFEjTZkyRUOGDNGuXbvUp08fffrpp/Ly8lJmZqaeeeYZZWdny2g0asmSJfrv//5vq+dI0pEjR3Tu3Dn5+vqqqKhISUlJevvttzVhwgSLc63Wrl2rw4cPWwVRv7djxw59/PHHmjt3rkaMGKHx48eruLhY//znP696FpXRaJTRaLziGAAAcH0glAIAADe8H3/8UY8//rjc3d21bt06dejQwdy3c+dO9erVS7GxscrJyTFv/5JkPtcoKyvLvEpIks6ePav+/fvrzTffVEBAgNUqqN/4+vrq+++/r7avpKREWVlZ+q//+i9J0v79+9WkSRP5+vpWO/7uu+/WO++8o4sXL+q2226z6vf39zf/9zfffCMnJyc5OTmZDyu/dOmSLl26pIqKCkm/rjYqLy/XuXPndOedd8rNza3aee3t7dWiRQv5+/vr3LlzkqQ2bdrI19dXubm5kqQ77rhDTZo0UYsWLSTJvGXwN6dOndLPP/+sRx55RAEBAVq4cKH8/f3VqVMnVVRU6LXXXlOzZs3k7Oys/fv3q7i4WAsWLDCfA3X27Fl5eXlp1KhRkqT/+q//Ul5enpo2baodO3Zo/vz5evDBB+Xk5FTtOwAAgBsToRQAALjhzZ49WyUlJfriiy8sAilJ6tSpk1avXq377rtPf/vb37Rw4UKL/sWLF1sEUpLUrFkzffTRRwoMDNTrr79+2VDqSsaPH6+CggJNnjxZ0q9nQl1plU+LFi106dIlFRYWmsOfy3nggQd08eLFavt8fHys2n744YdqQymTyaSKigr98ssvys3N1ZEjRyT9GvJduHBBx44dkyTl5eXJaDTq5MmT5vt+b9OmTZKkv/3tb/qv//ovLVu2TD169NCsWbPM51JJv4ZZjo6OsrOz04svvqjy8nI5ODiooqJCMTEx5lBK+vXsq4qKCj399NNycXHRe++9J6PRqIkTJyonJ0fLly+3ep+qqiodPXpU/+///T9WSwEAcAPgTCkAAHDD+89//qP7779f99xzT7X99957r7p27Wp1+LWnp6c5MPmjJk2a6KmnntKPP/6oH3/88ZrqmTlzphYtWqQJEyYoKirK3P7Hw8F/79KlS5KsA5/qODs764033lBZWZn5+stf/qIePXpYtKWkpEjSZQOaoqIiSdKsWbMUFBSkQYMGydHRUTExMbr77rv14osvytHRUaGhoerQoYNmz54tSeZD038THByspUuXqnPnzlZzREVF6cSJEyotLVVVVZVKS0tVXFxsDut++eUXVVRUVBsyPffcc/r+++/16quvqlWrVpKkM2fOaPfu3XJ3dzePGz16tBo3bqzbbrtNd9xxR7VniAEAgOsPK6UAAMAN7+TJk+rWrdsVx9xxxx3Kzs62aKtuVdHv/bbV7pdfflHbtm1rVEtiYqLeeust9enTR3PmzDG3u7m56ezZs5e979SpUzIYDJfdZvd79vb2+u///m/993//t1Wfs7OzVdsft9v9vqaKigqL7YL5+fmaM2eOhg4dquDgYIvxv20J/OM2uvbt26t9+/YWbTk5ORo2bJieffZZ+fv7q1GjRjKZTFbBXEVFhS5evGh1TtUrr7yixYsXS/p15dpvHB0drbY3hoeH68477zQfdF6Tv0MAANDwCKUAAMANz8PDQz///PM
2025-03-24 09:57:14 +08:00
},
"metadata": {},
"output_type": "display_data"
}
],
2025-03-26 14:57:30 +08:00
"execution_count": 48
2025-03-24 09:57:14 +08:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}