Beijing_air_quality_prediction/air_quality_prediction.ipynb

1279 lines
898 KiB
Plaintext
Raw Normal View History

2025-03-24 09:57:14 +08:00
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": [
"# 预测建模\n",
"北京市空气质量指数预测推荐难度系数10\n",
"\n",
"这个数据集是北京市2022年11月1日至2023年10月31日期间空气质量相关数据。\n",
"根据这个数据集,回答以下问题"
],
"id": "b610f839dca4877"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-03-24T00:39:20.612791Z",
"start_time": "2025-03-24T00:39:20.608728Z"
}
},
"source": [
"#导入基础包\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"#导入主成分分析相关包\n",
"from factor_analyzer import Rotator\n",
"from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo\n",
"# from sklearn.decomposition import PCA\n",
"# from sklearn.preprocessing import StandardScaler\n",
"\n",
"#导入XGBOOST相关包\n",
"from xgboost import XGBRegressor\n",
"from scipy.stats import randint, uniform\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from matplotlib.dates import DateFormatter, HourLocator\n",
"\n",
"#导入单独写的函数\n",
"from calculate import *\n",
"from heatmap import *\n",
"from sort_matrix import *"
],
"outputs": [],
"execution_count": 18
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:21.076798Z",
"start_time": "2025-03-24T00:39:20.619798Z"
}
},
"cell_type": "code",
"source": [
"#设置字体\n",
"plt.rcParams['font.family'] = 'SimHei'\n",
"#读取数据\n",
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
"data.head()"
],
"id": "92ea7ba1218799cd",
"outputs": [
{
"data": {
"text/plain": [
" date hour AQI CO NO2 O3 PM10 \\\n",
"0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n",
"1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n",
"2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n",
"3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n",
"4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n",
"\n",
" PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n",
"0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n",
"1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n",
"2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n",
"3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n",
"4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n",
"\n",
" RRR tR \n",
"0 0.0 12 \n",
"1 0.0 12 \n",
"2 0.0 12 \n",
"3 0.0 12 \n",
"4 0.0 12 \n",
"\n",
"[5 rows x 21 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>hour</th>\n",
" <th>AQI</th>\n",
" <th>CO</th>\n",
" <th>NO2</th>\n",
" <th>O3</th>\n",
" <th>PM10</th>\n",
" <th>PM2.5</th>\n",
" <th>SO2</th>\n",
" <th>T</th>\n",
" <th>...</th>\n",
" <th>P</th>\n",
" <th>Pa</th>\n",
" <th>U</th>\n",
" <th>Ff</th>\n",
" <th>Tn</th>\n",
" <th>Tx</th>\n",
" <th>VV</th>\n",
" <th>Td</th>\n",
" <th>RRR</th>\n",
" <th>tR</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2022-11-01</td>\n",
" <td>2</td>\n",
" <td>18.371429</td>\n",
" <td>0.211429</td>\n",
" <td>23.771429</td>\n",
" <td>29.057143</td>\n",
" <td>13.257143</td>\n",
" <td>3.057143</td>\n",
" <td>2.628571</td>\n",
" <td>6.7</td>\n",
" <td>...</td>\n",
" <td>770.5</td>\n",
" <td>0.1</td>\n",
" <td>36.0</td>\n",
" <td>1.0</td>\n",
" <td>5.3</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.3</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2022-11-01</td>\n",
" <td>5</td>\n",
" <td>21.914286</td>\n",
" <td>0.180000</td>\n",
" <td>26.571429</td>\n",
" <td>20.142857</td>\n",
" <td>18.914286</td>\n",
" <td>3.771429</td>\n",
" <td>2.542857</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>770.8</td>\n",
" <td>0.3</td>\n",
" <td>62.0</td>\n",
" <td>0.0</td>\n",
" <td>1.9</td>\n",
" <td>17.3</td>\n",
" <td>7.0</td>\n",
" <td>-4.5</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2022-11-01</td>\n",
" <td>8</td>\n",
" <td>28.628571</td>\n",
" <td>0.311429</td>\n",
" <td>30.028571</td>\n",
" <td>14.285714</td>\n",
" <td>27.942857</td>\n",
" <td>6.857143</td>\n",
" <td>2.400000</td>\n",
" <td>6.6</td>\n",
" <td>...</td>\n",
" <td>771.7</td>\n",
" <td>0.9</td>\n",
" <td>56.0</td>\n",
" <td>0.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>10.0</td>\n",
" <td>-7.1</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2022-11-01</td>\n",
" <td>11</td>\n",
" <td>19.000000</td>\n",
" <td>0.237143</td>\n",
" <td>17.971429</td>\n",
" <td>40.529412</td>\n",
" <td>17.852941</td>\n",
" <td>5.914286</td>\n",
" <td>2.176471</td>\n",
" <td>13.5</td>\n",
" <td>...</td>\n",
" <td>771.3</td>\n",
" <td>-0.4</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-9.7</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2022-11-01</td>\n",
" <td>14</td>\n",
" <td>21.742857</td>\n",
" <td>0.252941</td>\n",
" <td>15.588235</td>\n",
" <td>53.617647</td>\n",
" <td>20.941176</td>\n",
" <td>6.742857</td>\n",
" <td>2.000000</td>\n",
" <td>15.7</td>\n",
" <td>...</td>\n",
" <td>768.6</td>\n",
" <td>-2.7</td>\n",
" <td>19.0</td>\n",
" <td>2.0</td>\n",
" <td>0.9</td>\n",
" <td>17.3</td>\n",
" <td>30.0</td>\n",
" <td>-7.9</td>\n",
" <td>0.0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 19
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目1\n",
"研究单日内空气质量指数与各项指标的变化趋势,这种趋势是否具有周期性?"
],
"id": "bca65e544d8bef55"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:21.214231Z",
"start_time": "2025-03-24T00:39:21.202730Z"
}
},
"cell_type": "code",
"source": [
"#数据预处理:将数据按小时分组,计算每个小时各指标的平均值\n",
"\n",
"#可视化:绘制各指标小时均值的折线图,观察是否存在规律性波动\n"
],
"id": "5f8e89a8d1561e4f",
"outputs": [],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:21.293207Z",
"start_time": "2025-03-24T00:39:21.287594Z"
}
},
"cell_type": "code",
"source": "#ACF检验周期性\n",
"id": "4521bfa63d480997",
"outputs": [],
"execution_count": 21
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目2\n",
"简述各项指标间的相互关系。"
],
"id": "59e20f3463e819a6"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:22.701346Z",
"start_time": "2025-03-24T00:39:21.423153Z"
}
},
"cell_type": "code",
"source": [
"#计算相关系数矩阵\n",
"correlation_matrix = data.iloc[:, 1:].corr()\n",
"#绘制热力图\n",
"plot_heatmap(correlation_matrix,20,16,title=\"Correlation Matrix Heatmap\",save_path=\"correlation_heatmap.png\")"
],
"id": "c917d14115569bcd",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 2000x1600 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAABxkAAAY1CAYAAAD+bGCXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3QUVRsG8Gd30yspEJJQ0hsk9CJdUcFPUZSWgNKr9CqolFAEpIP0IiiIIh1BCC0ooAJKSSCU9AaBBJKQbMomm++PwIYlyZaQzWaX53dOznHmzl3euc7cuTPvFEFRUVERiIiIiIiIiIiIiIiIiIhUJNR2AERERERERERERERERESkW5hkJCIiIiIiIiIiIiIiIiK1MMlIRERERERERERERERERGphkpGIiIiIiIiIiIiIiIiI1MIkIxERERERERERERERERGphUlGIiIiIiIiIiIiIiIiIlILk4xEREREREREREREREREpBYmGYmIiIiIiIiIiIiIiIhILUwyEhEREREREREREREREZFamGQkIiIiotfOsWPHEBAQABMTE7Ro0QKXLl3Sdkjl6tSpE+bMmaPz/0ZZQkNDIRAIEBAQIJu3d+9eCAQCdOrUqcrjAQCBQIDQ0NBK/92BAwfCx8dHbt727dshEAiQnp5e6f8eERERERERkaYxyUhEREREr5Xz58/jo48+QsuWLXH8+HH4+vqiS5cuePLkibZD04jt27crTZpt3LgRw4cPr5qAynD79m3k5+cDAG7cuFGh37h27RpWrlz5yrFcvnwZzZo1e+Xf0aaDBw/i4MGD2g6DiIiIiIiI9ByTjERERET0WpkxYwYaNGiAzZs3o1OnTtiyZQsEAgE2bNig7dA0QpUko7e3N5ycnKomoDJIJBJEREQAAK5fv16h36isJGPz5s1haWn5yr+jTUwyEhERERERUVVgkpGIiIiIXhupqam4cOECevToAYFAAAAwMjKCv78/wsPDtRzd6+uNN96QJRdv3LiBVq1aaTkiIiIiIiIiIlKGSUYiIiIiem2Eh4ejqKgI3t7ecvNXrFiBiRMnyqZPnDiBxo0bw9jYGA0aNMCBAwfklp8zZw46deqEjIwMfP7553B0dMSZM2dk5bGxsRAIBIiNjcX27dsREBCAIUOGyP3GmTNn0LJlS5iYmMDX1xe7d+9Wa10KCwvx5Zdfok6dOrCwsEDbtm3x33//ycoFAgEEAgHOnTuH4OBg2XRsbGyp31L0Tcbly5ejfv36MDExQefOneWSsQMHDsTAgQNx5MgR+Pr6wtzcHO+99x5SU1PVWhd/f3/cuHEDmZmZiI2Nhb+/v1z5iRMn0KxZM5iZmcHFxUXuicWBAwdCIBBg0KBBiIuLk63ny+vz/FuLv/32G1q1aoV33323zFjK+ibjgwcPYGNjg+DgYNm8Hj16oGHDhpBIJGqtqyJisRjjxo1DrVq1YGNjgz59+uDRo0dyy6xYsQJubm4wMzND48aNcerUKVmZi4sLBAIBduzYgR07dsja4vn6dOrUCaNGjUKbNm1gZWWFX375Bd27d4e5uTm+/vpr2e/s2rULfn5+MDMzg4+Pj9y2OWfOHDRs2BBffPEFatSogbp162LBggWQSqWV1g5ERERERESkG5hkJCIiIqLXxsOHDwEAtra2cvObNm2K5s2bAwBCQ0Px/vvvo3nz5jh27Bjefvtt9OjRA0eOHJGrk5+fj86dOyM+Ph5fffUVvLy8Sv17y5Ytw9y5cxEYGIjAwEDZ/Dt37qBr165o3rw5QkJC0KdPH/Tt2xenT59WeV0WL16MJUuWYP78+Th27BicnJzQq1cvWfnly5dx+fJlNG3aFMOGDZNNq/Na1Hnz5mHGjBkYPXo0Dh8+DKFQiA4dOiA+Pl62zJUrVzBq1Ch89dVX+P7773Hx4kUsWrRI5X8DKE4yXr9+HTdu3ICtrS2cnZ1lZTExMfjoo4/g5+eHkJAQTJ8+HZMnT8aff/4JoDjpdfnyZcyePRuOjo6y9SzrG5O//vorhg4dii5dumDkyJEqx1e7dm1ZeycnJ+OPP/7AwYMHsWnTJhgaGqr8O3fu3JEl/p4nRl80cuRIHDx4EOvWrcNPP/2E8PBwfPLJJ7Lyn376CZMmTcLnn3+OkJAQtGvXDj179sTTp08BAEeOHMHly5fxwQcf4IMPPpC1xYvfmNy5cycmTJiAVq1aITAwEM2bN8eYMWOwYsUKAMCFCxfw2Wef4X//+x9CQkLQr18/9O/fH9HR0bLfuHXrFs6dO4c9e/Zg/PjxmDNnDtauXatyOxAREREREZF+MNB2AEREREREVSUvLw8AIBSWf6/dnDlz0Lp1a2zZsgUA0LlzZ0RGRmLWrFno1q2bbLm//voLY8aMwZo1a8r9rWPHjuGff/5BzZo15eYvXLgQDRo0wLp16wAAHTp0wJEjR/DDDz+gc+fOKq1LQEAA9u3bhw8//BAA8PjxY+zduxcPHz5ErVq1ZElTS0tLODk5yaZVJRaLsWjRIkyZMgXTpk0DALRp0wbu7u5YsWKFLCl169YtXLp0Sfb7586dU/u7iv7+/pg3bx6uX79e6inGgoICrFy5Ev369YOlpSUaN26MBQsW4K+//kL79u3h4uICFxcXhIeHw8jISOF6/vzzz/jnn3/g4eGhVnwAMGzYMPzwww+YMWMGwsPDMWLECLRp00at33BxcZF7Kvbw4cOYPXs2gOJk6s6dO7F//350795dtu4ffvghYmJi4OrqCmdnZ+zcuRP9+vUDAFhYWGDt2rWIiIhAy5YtZW1nZ2cHAGW2RVBQEHr37o2bN28iLi4OX3/9Nc6ePYtvv/0WAGBqaoqNGzdi8ODBEIlE8PLywrx583DlyhW4ubkBKH7a8+eff4aLiwveffddhIWFYfXq1Rg7dqxa7UFERERERES6jU8yEhEREdFrw8LCAgCQnZ0tN/+LL77ApEmTABQ/Afhyoq9z5864fv263Ksx7e3tsXDhQoX/3jfffFMqwQgAYWFhuHbtmtxTbf/99x/u3bun8rr873//Q25uLgYNGgQfHx/06NEDQHFysDLcvHkTYrEYb7/9tmyehYUFWrVqhcuXL8vmtW7dWi6ZVbNmTbVfIerv74+HDx8iJCSkVJLR09MTbdu2xcKFC9G+fXvUqlULSUlJFVrPadOmVSjBCBQn1jZt2oTdu3fj/v37aj+tCQDGxsZo3Lix7K9evXqysuev8v34449l28TzBPLz7aJjx46ws7PDqFGj0KhRI7Ro0QKAev/Pnz/JKhAI5P77uaZNm8LPzw9TpkxBy5YtUbduXRQUFMj9G87OznBxcZFNt2jRArGxsSgoKFCzRYiIiIiIiEiXMclIRERERK8NT09PAJB79SNQ/IrIpKQkAEBRUZFc0gUofvKxqKgIRUVFsnkNGjSQJS3L06pVq3LLunfvjqtXr8r9bd++XeV1CQoKwujRo+Hs7IyFCxfi0qVLKtdVxfN1La8tnnN3d3/lf8vW1hZOTk44fPhwqSTjkSNH0LRpU8TGxmLgwIG4fPkyOnToUKF/R9H/D1WkpKSgsLAQWVlZyMjIeKXfKs/x48dLbRetW7cGUJwM79mzJ8zMzPDll18iLi6u0v/9devW4c0335R9HzIiIkIuGQpA7v8/AEilUllilIiIiIiIiF4fTDISERER0WujQYMGqFOnDg4fPiybl5WVhWvXrsm+W9eiRQucOXNGrt7p06fRqFEjGBkZVUocDRs2RHx8vNxTbRcuXMDu3btVqp+RkYE9e/Zg8eLFmD9/Pj7++GM8evSozGVNTEyQk5OjdowNGjSAmZmZ3Hcis7Oz8ffff8ueoAMAkUik9m+X5XlyMSAgQG7+tm3b8MYbb+Cnn37CkCFD4O7uXmZyraLrqSqxWIxhw4ZhwoQJaNGiRZnffHwVDRo0AFD8St/n24SDgwOWLl0qW99NmzZh0qRJWLZsGfr06YPMzMwyf+tV2mLz5s0IDAzExo0b8emnn8LS0hKPHz+WWyY5OVkuUX/p0iW4ublV2rZAREREREREuoFJRiIiIiJ6bQgEAixatAhnzpzByJEjcfL
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 22
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:23.335605Z",
"start_time": "2025-03-24T00:39:22.772544Z"
}
},
"cell_type": "code",
"source": [
"#主成分分析PCA)\n",
"PCA_data=data.iloc[:,2:]#去除日期列\n",
"\n",
"# 计算KMO值\n",
"kmo_all, kmo_model = calculate_kmo(PCA_data)\n",
"print(f\"KMO值: {kmo_model.round(3)}\")\n",
"# 进行巴赫利特检验\n",
"chi_square_value, p_value = calculate_bartlett_sphericity(PCA_data)\n",
"print(f\"巴赫利特检验卡方值: {chi_square_value.round(3)}, p值: {p_value}\")\n",
"#判断\n",
"if kmo_model>0.7 and p_value<0.05:\n",
" print(\"数据适合进行主成分分析\",'\\n')\n",
"else:\n",
" print(\"数据不适合进行主成分分析\",'\\n')\n",
"\n",
"# 数据标准化\n",
"scaled_data = (PCA_data - PCA_data.mean()) / PCA_data.std()\n",
"scaled_data = scaled_data.dropna()#去除空值\n",
"\n",
"# 计算协方差矩阵\n",
"cov_matrix = np.cov(scaled_data, rowvar=False)\n",
"\n",
"# 计算特征值和特征向量\n",
"eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)\n",
"sorted_indices = np.argsort(eigenvalues)[::-1]\n",
"sorted_eigenvalues = eigenvalues[sorted_indices]\n",
"sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
"\n",
"# 绘制累计方差解释比例图\n",
"explained_variance_ratio = sorted_eigenvalues / np.sum(sorted_eigenvalues)\n",
"cumulative_explained_variance = np.cumsum(explained_variance_ratio)\n",
"print(\"累计方差解释比例:\", [f\"{cum * 100:.2f}%\" for cum in cumulative_explained_variance])\n",
"\n",
"plt.plot(range(1, len(cumulative_explained_variance) + 1), cumulative_explained_variance, marker='o')\n",
"plt.xlabel('主成分数量')\n",
"plt.ylabel('累计方差解释比例')\n",
"plt.title('PCA 累计方差解释比例')\n",
"plt.show()\n",
"\n",
"# 选择特征值大于1的作为主成分\n",
"mask = sorted_eigenvalues > 1\n",
"selected_eigenvectors = sorted_eigenvectors[:, mask]\n",
"\n",
"# 计算因子载荷矩阵\n",
"loadings = selected_eigenvectors * np.sqrt(sorted_eigenvalues[mask])\n",
"\n",
"# 使用Varimax旋转载荷矩阵\n",
"rotator = Rotator(method='varimax')\n",
"rotated_loadings = rotator.fit_transform(loadings)\n",
"\n",
"# 输出旋转后的成分矩阵\n",
"rotated_components_df = pd.DataFrame(rotated_loadings,\n",
" index=PCA_data.columns,\n",
" columns=[f'Factor{i+1}' for i in range(rotated_loadings.shape[1])])\n",
"rotated_components_df = rotated_components_df.round(3)\n",
"\n",
"# 输出排序后的载荷矩阵\n",
"rotated_components_df=sort_matrix_by_diag(rotated_components_df)\n",
"print(\"旋转后的载荷矩阵(排序后):\\n\", rotated_components_df)\n",
"plot_heatmap(rotated_components_df, 4, 8,save_path=\"components_heatmap.png\")"
],
"id": "509d783a82bbdcb2",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KMO值: 0.762\n",
"巴赫利特检验卡方值: 90424.712, p值: 0.0\n",
"数据适合进行主成分分析 \n",
"\n",
"累计方差解释比例: ['31.41%', '54.60%', '66.53%', '73.02%', '78.89%', '84.04%', '88.27%', '91.46%', '93.59%', '95.70%', '97.14%', '98.29%', '98.91%', '99.26%', '99.55%', '99.79%', '99.96%', '100.00%', '100.00%']\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjUAAAHECAYAAADBM9u5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABni0lEQVR4nO3dd3hT1f8H8HdG90hbWii0Bcreo6UspUwBBw5AGfJDcQEqiigKLkCRLSIq8GXJUhREULYoAiICLUtKUSy00AmlI+nMPL8/aiOhK2mTpmnfr+fJU+7NOfd+btI0H849QyKEECAiIiJycFJ7B0BERERkDUxqiIiIqFZgUkNERES1ApMaIiIiqhWY1BAREVGtwKSGiIiIagUmNURERFQrMKkhIiKiWoFJDRFZnVarLfO506dP48SJExYdLz8/H1999VWlYrl+/Tp+++23EvuXL1+O7OzscuvOmjULn376qck+g8EAS+YsVSqVOHbsGGbMmIHPPvsMarUahYWFFh2jtGPGxMTAYDCYVf7UqVOIjY2tsJxGo8HJkyehVCorHRuRPTGpISIjg8EAqVQKhUIBf39/48PJyQkeHh4m+9zd3dG7d+8Sx/joo48waNAg5OXllXqO//3vf1i2bJlFce3fvx/jxo3DoUOHLL6m1atXY/jw4bh586ZxX1RUFF599VXs37+/zHo3b97EJ598Ap1Oh+zsbGRnZ6OgoADbtm2DVCqFi4sLXF1dTR7fffedMd727dvD398fPj4+GDJkCKKiopCdnY358+fDzc0NUqkUEonE5HHs2DGTGAYOHIj58+eXiG3fvn3o2LEjCgoKzHoNFi9ejGHDhuHEiRP4/vvvsWvXLuzatQvbtm3D+fPnjeUKCwvRq1cvsxIgopqISQ2RnSUkJJh8sXl5eeHee+8t0ZqRlpaGkSNHwtPTE40aNcLChQtLHMtgMMDf3x/jx4+vVCzFX9bffvstbt++bXzcc889mDlzpsm+N998E66uriWOMX78eCQkJGDYsGGlfunK5XI4OztbFNfw4cMRHh6ON954w6zycXFxSEhIQFJSEp5++mkEBwcjNjYWSUlJSEpKwoIFC9C/f3/06dMHSUlJiI+PN0l6hBCYPHkyFAoFtm/fjs6dOyM4OBhxcXEYPnw4CgoK8Nxzz2HEiBEoLCxEYWEhpFIpfHx8AACRkZEYO3YsNmzYgPHjx2PUqFHYt28fxo4di3fffRczZswwvj4FBQV47bXX0K1bN0RGRppcR0ZGBvLz80tcX/HrZ87rmJubi0OHDuHVV19FVFQU5s+fj8ceewzLly/H559/jlOnThnLenh4mPwkcjRyewdAREU++ugjDB48GLdv38bq1asxcOBAxMbGIjQ0FBqNBoMHD4ZGo8G2bduQnJyMV155BQ0aNMDTTz9tPMbZs2eRkZFRqRaNYlKpFK+99hpmz55t3BcbG4urV69iz549xn1JSUlo06ZNifohISE4ePAgunfvjunTp+Pzzz83eV4ikZR57sLCQsTGxsLFxQUymczkuddffx3e3t7466+/TPYLIaDVahEUFIR69eoBAPr27YusrCyTL/0HH3wQhYWF8Pb2Nu7r0KEDhBDQ6XSYOHEili5dCgCYPn06zp49i+joaLi5uSE8PBwrVqxAx44dARTdXvvxxx+NrSi5ubkoKChAgwYNAABubm64ceMGhg8fjoCAANy+fRvHjh3D8OHDkZSUhJdeegktW7bEH3/8AYVCgdWrV5u8tsVkMlmJ1wH4L5lxcnIy7svKysLChQsxd+5cyOX//WnfuHEjCgoKMG7cOPj5+aFVq1YYOXIkDh8+bCxzzz33oGvXrsb3Sirl/3fJMTGpIaohmjVrhm7dugEABgwYgODgYKxbtw5z587F5s2bcfHiRZw5cwZhYWEAgPPnz2P+/PkmSc1PP/0EFxcXpKWl4eLFi8YvYUsYDAYsWrQI9913n3Hf4MGDMWDAAMyYMcO4b+7cuWX2jWnTpg1+/PFHdOnSxaJzJyQkIDw83OKYAWDz5s0YN24cACA5ObnE83PnzsWOHTtw7ty5co9z8uRJrF+/Hv7+/ggKCoIQAgaDAc888wyeeeYZfPnllzh9+jQ0Gg2GDx+OzZs3Y/PmzXB1dUWzZs0AFCUFFy9exPbt243H3b9/Px544AEoFAooFAp8/vnneOSRR+Dt7Y2pU6eiX79+JWIpKwG8e/+xY8fw1FNPISUlBZGRkXjggQcAADqdDsuWLUOjRo3g5+cHoKiPUWhoqEl9b29vKBSKcl8XIkfApIaoBnJ2dkazZs0QHx8PANi1axeaNWtmTGgAoFu3bli5ciXUajVcXFwAAIcOHcLYsWOxd+9e/PTTTxYnNQaDAYWFhXjxxRdNbkHcuHEDly9fxjfffGPcd/v2bbRq1cq4rdFocO7cOWMsjRo1gre3N1JSUuDq6mr8379Go4FWqzV20i0+Z6NGjdCqVSvk5eWZtNRIpVLs2rULDz/8cKkxCyGgVqtLtC5cuXIFbm5uxgTg7NmzaNKkCZKSkkzK6XQ6SKVSNG7cGADQs2dPXL16FaNHj8YLL7yAwMBArF27FkeOHEGLFi1gMBhw5swZrFu3Du7u7vD19UWDBg2wa9cu42um0+kwZcoUeHt749dffwUAvPrqq8Z+RlqtFnl5edBoNFAqlbh69SpOnDiBnj17lriO7OxsJCQkwGAwQKPRmNzyu3LlCmbNmoXt27dj7Nix+OCDD9C0aVPj86tWrUJcXByaNGli3Hf8+HGkpqZi6NChAIAZM2bA3d3dpNWHyGEJIrKr+Ph4AUBs3brVZH+TJk3E66+/bvz3/fffb/J8Wlqa2L9/vygsLBRCCJGXlyecnZ3Fxo0bxciRI8XgwYOrFFdKSooYMWKEaN26tfjll1+EEELodDrxv//9TzRq1Ejs27fPpHxSUpLw9vYW/v7+wsPDQ/Ts2VOkpqYKAGY9SqPVagWAEucyh0wmE56enkKhUAiFQiEACA8PD+O2QqEQ3t7ewsPDQ0RGRpaof99994lXX31VfPDBByIsLExERUWJ4OBg8fPPP4vs7Gzxzz//iOTkZBEeHi4yMjJM6m7fvr3Ua3ziiSfE22+/LYKDg0WjRo3EgQMHREpKihg5cqSQSCRCoVCIAwcOGI8THh4uZDKZcHFxEU5OTgKAGDJkiNi9e7cAINzc3MT48ePFX3/9VSL+f/75R3h7e4vOnTuLJk2aCCGEKCgoEL6+vuL1118XO3fuFD4+PuKXX34RI0aMELNmzRJCCAFAXLx40eLXm6gmYEsNUQ2TkZGBlStX4saNG3j88ccBALdu3cK9995rUq5BgwbG/20DwJEjR6DRaNC3b1+oVCq8+eabJq045jp27BjWrFmD7777Du7u7hg1ahS+/fZbrF+/HlFRUcjNzcWUKVPQtWtXk3pBQUHGocCzZ8/GkSNHUL9+faSnp8PNzc3Y8jJlyhTk5eVh/fr1AIpaWgoLC0uNJTc3F0DRSKS7+9IUq1evHgICAozbSqUSQgiT4doHDx7E+PHjcenSJWO/mzsJIZCZmQkfHx9jS4kQAj/++COcnJyQlpaGqVOn4vbt25DJZPjpp5/w/PPP448//sCZM2eMLTRCCEgkEgwdOhRXr17FpUuX8PDDD+OJJ57AokWL4OLigiNHjmDSpEmYOnUqfvvtN6xduxbbt29HYmIifvjhBwwePNgktnfffdfYv0kIAb1ejwMHDgAArl69ioYNG5b6uiQnJ6Np06Z46623MHPmTADAZ599Bq1Wi3fffReurq5QKpUIDg4utT6RI2JSQ1RDjBkzBmPGjAFQ1MdhzZo16NGjBwCUenvlbocOHULTpk3RpEkT9OvXDwUFBfjtt98waNAgi+LIysrC9evXMWbMGISEhCA+Ph7btm1Do0aN8NZbb2HgwIH4+OOPTTqjlkYul0MqlcLf399kv0wmg1QqNbmN4ubmVuoxbt++DQCYMGFCmedZsGAB3nrrLeP2q6++io0bN5Za9s5bM6VJTU1FYGAggKLbYq+//joUCoXJ7ScA2LJlC1QqlTGxUygUMBgMeOeddzB
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"旋转后的载荷矩阵(排序后):\n",
" Factor1 Factor3 Factor2 Factor4 Factor5\n",
"Tn -0.963 -0.035 -0.071 -0.079 0.035\n",
"T -0.958 0.138 -0.033 0.074 -0.028\n",
"Tx -0.954 0.014 -0.045 -0.052 -0.063\n",
"P 0.924 0.029 -0.071 -0.032 -0.000\n",
"Po 0.921 0.029 -0.073 -0.033 -0.000\n",
"Td -0.898 -0.366 -0.043 -0.013 0.076\n",
"O3 -0.637 0.529 0.030 0.239 -0.084\n",
"U -0.322 -0.824 0.008 -0.156 0.229\n",
"Ff -0.045 0.772 0.126 0.024 0.172\n",
"NO2 0.300 -0.728 0.290 0.110 -0.202\n",
"CO -0.101 -0.695 0.449 0.298 -0.007\n",
"VV 0.153 0.667 -0.531 -0.093 -0.175\n",
"AQI -0.017 -0.038 0.967 0.025 -0.029\n",
"PM10 0.037 0.060 0.933 -0.092 0.003\n",
"PM2.5 0.049 -0.359 0.879 0.149 -0.007\n",
"Pa 0.006 -0.055 0.147 -0.747 -0.130\n",
"SO2 -0.035 -0.099 0.208 0.694 -0.065\n",
"RRR -0.139 -0.094 -0.103 -0.077 0.819\n",
"tR 0.163 0.120 0.087 0.131 0.512\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 400x800 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAMWCAYAAADrsBE8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hT5dvA8W+S7r0odFAonUBb9pApouICkT1E9pChoPJTVLYyZAsCgshQVEBAEGRTlqKASCkyuwsthRZI26QjbfL+UQiEpE0KSaG+z+e6znVxxp3ed07Ic57nnJwj0Wg0GgRBEAThIdInnYAgCILwdBINhCAIgmCQaCAEQRAEg0QDIQiCIBgkGghBEATBINFACIIgCAaJBkIQBEEwSDQQgiAIgkGigRAEQRAMEg2EoHXo0CEkEgl//vmnznKJRMLChQufTFKCIDwxooEQKlxSUhJTpkx50mkIgmCEaCCECpeUlMTUqVOfdBqCIBghGghBEATBINFACOWyceNGIiIisLe3p0GDBhw4cEBn/alTp2jTpg1OTk74+fnx0Ucfce+GwVOmTEEikdCuXTug5NyGRCJhwIABAKxZs4aaNWvyySef4OjoSN++ffnmm29wc3OjUaNGyOVyAOLi4njllVdwdXWlatWqDBs2jLy8PKCkdyKRSJgzZw4hISG4uLjQqVMn0tLSKugdEoT/EI0g3BUdHa0BDE4LFizQHDhwQCORSDSTJ0/WHD58WDNixAiNtbW15sKFCxqNRqPJycnReHh4aF588UXN4cOHNevXr9c4OTlp1q1bp9FoNJpr165pTp48qVm+fLkG0Jw8eVJz8uRJTWJiokaj0WhWr16tATSjRo3SzJ8/XwNo2rRpo9m1a5fGyspKs3HjRo1ardaEhYVpGjRooNm/f7/ml19+0fj6+mqmTZum0Wg0msTERA2gcXZ21qxcuVLz66+/asLCwjSNGjXSqNXqJ/K+CkJlJRoIQeteA7Fu3TrNP//8o53uNRBt27bVvP7669rti4uLNV5eXppJkyZpNBqNJj09XbNs2TLNtWvXNBqNRlNQUKBp2rSp5u233zb4dx62evVqjaOjo6awsFCTkJCgATS///67RqPRaAICAjSrV6/W5ObmapYtW6a5ePGiNocuXbpoXn75ZY1Gc7+BuNdgaDQazcGDBzWA5tixY+Z7swTh/wGriu+zCE+7kJAQ6tevr7c8NjaWW7duIZFIdJZfuXIFgGrVqtGhQwe++eYbjhw5wsmTJ8nJyaF27dom/20vLy+sra21f8PX1xdAO+/o6EiXLl1Yu3Ythw4d4q+//uLWrVu0adNG53Vatmyp/XeTJk2AkqGpB5cLglA20UAI5TJy5EiGDh2qs8zNzQ2A06dP06pVK9q1a0fXrl2ZNWsWixcvNuvfT01NpUGDBoSHh9O9e3cmTJjAnj17OHr0qM52mgcelKhWqwGQSsUpN0EoD9FACCaLiIggPT1dp3cxZcoUqlSpwqhRo/juu++oUqUKO3fuBEq+pK9cuUJoaKjO69jZ2QGQl5eHvb19uXLYsmUL2dnZHDhwAFtbWwCWLFmit93Ro0dp3749ACdOnABKekaCIJhONBCCySZNmsQLL7zAJ598QocOHfj999+ZNm0amzZtAkqGhzIyMti4cSNOTk4sXryY48ePU6tWLZ3XqVOnDs7OznzxxRe0a9eO2NhYunXrZlIOXl5eqFQqVq9eTUhICKtXr2bDhg16Q0cLFizAz8+PatWq8cEHH9C0aVOaN29unjdCEP6/eNInQYSnx72Tx8ePH9dZzt2T1BqNRrNhwwZN3bp1Nba2tprw8HDN6tWrtdvl5uZqevXqpXF2dtb4+/trhg0bphkxYoQmODhYo1KpdF7z119/1YSEhGisrKw0tWrV0qSlpWlWr16tqVGjhkajuX+y+d4VTjVq1NCsXr1aU1RUpBk9erTGw8ND4+3trenZs6dm4sSJGldXV82dO3e0cYsXL9aEh4drbG1tNS+88IImKSnJUm+bIPxnSTSaBwZrBaGSS0pKIjAwkJMnT9K4ceMnnY4gVGrirJ0gCIJgkOhBCIIgCAaJHoQgCIJgkGggBEEQKlhWVhaBgYEkJSWZtP3hw4epXbs2Xl5ezJ8/3+R1j0s0EIIgCBUoMzOT1157zeTG4ebNm3Tq1InevXtz/Phx1q9fT3R0tNF15iAaCEEQhArUq1cvevXqZfL269evx8fHh4kTJxISEsKkSZNYtWqV0XXmIBoIQRCEx1RQUEB2drbOVFBQYHDbFStW8O6775r82jExMTz33HPa+5E1bdqU06dPG11nDuKX1IIgVEo7rcOedApaJz/prfeUxMmTJxt8tO7DdxYwJjs7mzp16mjnXVxcuHbtmtF15vBUNBDPdjv+pFMwi0M/P8Ol+NQnnYZZhAVV51zc9SedhllEBFcjNi7jSadhFpHBVUm5cuFJp2EWASGm3+X3aTdhwgTee+89nWX37hX2uKysrHRey87ODqVSaXSdWf622V5JEATh/ylbW1uzNQgP8/Dw4ObNm9r5nJwcbGxsjK4zB9FACIJQKUmsJcY3+g9o0qQJP/74o3b+zJkz+Pn5GV1nDuIktSAIwlMgOzsblUqlt7xTp04cO3aM6OhoioqKmDt3Lh06dDC6zhxED0IQBOEpEBUVxcKFC+ncubPOci8vL+bNm0eHDh1wdXXF0dFReylrWevMQTQQgiBUSlKryj3E9PBt8Mr64dzIkSN58cUXuXDhAm3btsXFxcWkdY9LNBCCIAiVQHBwMMHBweVe9zjEOQhBEATBINGDEAShUpJYi+NbSxPvsCAIgmCQ6EEIglApVfaT1JWB6EEIgiAIBlXqBiKwuj3LZ0Xy65omjOhXw+S4Xq/78v3i+mz7tjHvDgnEzlb/bZg0LoR3BtU0Y7ZlS05K5L13R9K7R2dWr/pa7xK40pyLjWHk8EH07dWFX7b8bHCboqIixrw9hNizZ8yYcelSkhL439hhvNXjVdauWmZyLf/GnuGd4f0Y0LsT27du0C7fsH41XV9tqzedO/uPpUrQSklK4MOxw+jf4xXWrVparlreHf4mA3t35NcHagHYtvlHBvd9nX7dXmLO55+Sky23ROpaiUnJjBr3AW/07MuKb9eYVENM7DkGjRhN1z79+HnrNoPbFBUVMXTUO8ScjTV3ysJT4pEbiIKCAlJTU0lJSdGZKoq1lYQZH4VzOSGX4R+epYa/PS+1q2I07tX23nR9pRqfLYpj9KfnqB3sxHvDdO+u2KSeKw0iXPn2p4q58Z5KVcj0qRMJDg5l/qKlpKYkc2DfHqNxcvkdPps6kTZt2zFn3pccPnSAszFn9Lbb8vMGkpOTzJ+4ASpVITOnTSAoOIwvFq3gakoS0ft3GY2Ty+8wa9rHtGrbnhlzl3I0ej+xMSW3LX6jex/WbdihneYt+RYXVzcCg0IsXsusaR9RKziU2YtWlquW2dMm0Krt83dr2ce5u7WcP3eGQwd2M232Yr748htUhYWs/eYri9VQqFIxafrnhAQF8dXCuSSnpLJn/8EyY+7I5UyaPoN2bVuzaM5sDh46zBkDjcCGzVtJSq64//MPk1hLnprpv+qRGojFixfj5uZGzZo1dabAwEBz51eqZg3ccHSw4qs1yaRlFPDNDym8+py30bgX21bhp21pXIzLJTUtn9UbUmnZxF273sZGytihtVixPplcZbElS9D6++RJlAoFg4eOwMfHl379B7Nvr/EvokPRB3D38KRn7zfx9fOnZ+839eLSrl1l65ZNeFetZqn0dZw+9RdKhYIBQ0ZRzcePvv2HcmDvb0bjjkbvw83Dk+69++Pr50/33v05eDfOxsYWRydn7bTr1y107NwdR0cni9byj7aW0VTz8aNP/2Ec3LvThFr24ubhSbfe/fHxq0633v05cDfuyqULNGzcHD//AHx8/WnV9nnSrlnuQOTkqb9RKJSMGDIIXx8fBr31Jrv37S8z5uChw3h6uPNmrx74+/nyZu+e7N6rG3P1Who/b/mFalWN/58TKq9HaiAmT57MF198QX5+Pmq1WjsVF1fMFyp
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 23
},
{
"metadata": {},
"cell_type": "markdown",
"source": [
"## 题目3\n",
"令2022年11月1日至2023年9月30日的空气质量数据为训练集剩余数据为测试集。基于训练集尝试使用两种不同的方法构建空气质量指数预测模型并在测试集上测试。比较所选模型的预测效果。"
],
"id": "3f89fa62a897a3e3"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:23.908760Z",
"start_time": "2025-03-24T00:39:23.432226Z"
}
},
"cell_type": "code",
"source": [
"#重新读取数据\n",
"data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n",
"# 合并 date 和 hour 为新的 data_hour 列\n",
"data['data_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n",
"# 设置 data_hour 为索引列\n",
"data = data[['data_hour', 'AQI']].set_index('data_hour') # 仅保留时间和AQI"
],
"id": "d1bdac1e4e1562f2",
"outputs": [],
"execution_count": 24
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### (1)SARIMA模型",
"id": "75bc1cfcc85f60a7"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:23.920602Z",
"start_time": "2025-03-24T00:39:23.914779Z"
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
"该模型在假设不知道测试集其他指标的情况下仅使用AQI历史数据预测未来AQI\n",
"\"\"\"\n",
"\n",
"#训练模型\n",
"\n",
"#输出预测与实际AQI的对比图\n",
"\n",
"#计算拟合度\n"
],
"id": "24996a0c06820cdc",
"outputs": [
{
"data": {
"text/plain": [
"'\\n该模型在假设不知道测试集其他指标的情况下仅使用AQI历史数据预测未来AQI\\n'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 25
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### (2)XGBOOST模型",
"id": "ebe88094b6c13e0c"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:24.054353Z",
"start_time": "2025-03-24T00:39:24.027621Z"
}
},
"cell_type": "code",
"source": [
"\"\"\"\n",
"该模型在假设不考虑测试集其他指标的情况下仅使用AQI数据对未来AQI进行<单步预测>即每次预测都是根据之前时间点的真实AQI值进行的。\n",
"整体运行时间约为20s请耐心等待。\n",
"\"\"\"\n",
"#特征工程\n",
"data_processed = data.copy()\n",
"\n",
"#时间分解特征\n",
"# 基础特征\n",
"data_processed['hour'] = data_processed.index.hour\n",
"data_processed['day_of_week'] = data_processed.index.dayofweek\n",
"data_processed['month'] = data_processed.index.month\n",
"\n",
"# 周期性编码\n",
"data_processed['hour_sin'] = np.sin(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['hour_cos'] = np.cos(2 * np.pi * data_processed['hour'] / 24)\n",
"data_processed['week_sin'] = np.sin(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"data_processed['week_cos'] = np.cos(2 * np.pi * data_processed['day_of_week'] / 7)\n",
"\n",
"#滞后特征\n",
"# 生成3小时粒度的滞后特征最多7天\n",
"lags = [i for i in range(1, 7 * 8 + 1)] # 7天*每天8个时间点3小时间隔\n",
"for lag in lags:\n",
" data_processed[f'AQI_lag_{lag}'] = data_processed['AQI'].shift(lag)\n",
"\n",
"# 划分数据集\n",
"train_data = data_processed.loc['2022-11-01':'2023-09-30']\n",
"test_data = data_processed.loc['2023-10-01':]\n",
"\n",
"# 特征选择\n",
"features = [col for col in train_data.columns if col != 'AQI']\n",
"X_train, y_train = train_data[features], train_data['AQI']\n",
"X_test, y_test = test_data[features], test_data['AQI']"
],
"id": "66f104e110aba36",
"outputs": [],
"execution_count": 26
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:44.748314Z",
"start_time": "2025-03-24T00:39:24.076578Z"
}
},
"cell_type": "code",
"source": [
"#随机搜索法参数调优(这里耗时较长,请耐心等待)\n",
"param_dist = {\n",
" 'n_estimators': [100, 200, 300],\n",
" 'max_depth': randint(5, 10),\n",
" 'learning_rate': uniform(0.01, 0.2),\n",
" 'subsample': uniform(0.7, 0.3),\n",
" 'colsample_bytree': uniform(0.7, 0.3),\n",
" 'gamma': uniform(0, 0.3)\n",
"}\n",
"\n",
"search = RandomizedSearchCV(\n",
" XGBRegressor(n_jobs=-1, random_state=42),\n",
" param_distributions=param_dist,\n",
" n_iter=10,\n",
" cv=3,\n",
" scoring='neg_mean_absolute_error',\n",
" verbose=1\n",
")\n",
"search.fit(X_train, y_train)"
],
"id": "199aa487e826c1ac",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 3 folds for each of 10 candidates, totalling 30 fits\n"
]
},
{
"data": {
"text/plain": [
"RandomizedSearchCV(cv=3,\n",
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
" gamma=None, grow_policy=None,\n",
" importance_type=None,\n",
" interaction_constraints=None,\n",
" learning_rate=...\n",
" 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86B7EC90>,\n",
" 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001EBFF5165D0>,\n",
" 'n_estimators': [100, 200, 300],\n",
" 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86A1C5F0>},\n",
" scoring='neg_mean_absolute_error', verbose=1)"
],
"text/html": [
"<style>#sk-container-id-4 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-4 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-4 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-4 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-4 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-4 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-4 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-4 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-4 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-4 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomizedSearchCV(cv=3,\n",
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
" gamma=None, grow_policy=None,\n",
" importance_type=None,\n",
" interaction_constraints=None,\n",
" learning_rate=...\n",
" &#x27;learning_rate&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86B7EC90&gt;,\n",
" &#x27;max_depth&#x27;: &lt;scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001EBFF5165D0&gt;,\n",
" &#x27;n_estimators&#x27;: [100, 200, 300],\n",
" &#x27;subsample&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86A1C5F0&gt;},\n",
" scoring=&#x27;neg_mean_absolute_error&#x27;, verbose=1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;RandomizedSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.model_selection.RandomizedSearchCV.html\">?<span>Documentation for RandomizedSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>RandomizedSearchCV(cv=3,\n",
" estimator=XGBRegressor(base_score=None, booster=None,\n",
" callbacks=None,\n",
" colsample_bylevel=None,\n",
" colsample_bynode=None,\n",
" colsample_bytree=None, device=None,\n",
" early_stopping_rounds=None,\n",
" enable_categorical=False,\n",
" eval_metric=None, feature_types=None,\n",
" gamma=None, grow_policy=None,\n",
" importance_type=None,\n",
" interaction_constraints=None,\n",
" learning_rate=...\n",
" &#x27;learning_rate&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86B7EC90&gt;,\n",
" &#x27;max_depth&#x27;: &lt;scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001EBFF5165D0&gt;,\n",
" &#x27;n_estimators&#x27;: [100, 200, 300],\n",
" &#x27;subsample&#x27;: &lt;scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86A1C5F0&gt;},\n",
" scoring=&#x27;neg_mean_absolute_error&#x27;, verbose=1)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">best_estimator_: XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=0.7508184716426058, device=None,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, feature_types=None, gamma=0.020833743645897518,\n",
" grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.05075327204554973,\n",
" max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=5, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimators=300, n_jobs=-1,\n",
" num_parallel_tree=None, random_state=42, ...)</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">XGBRegressor</label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=0.7508184716426058, device=None,\n",
" early_stopping_rounds=None, enable_categorical=False,\n",
" eval_metric=None, feature_types=None, gamma=0.020833743645897518,\n",
" grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.05075327204554973,\n",
" max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=5, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimators=300, n_jobs=-1,\n",
" num_parallel_tree=None, random_state=42, ...)</pre></div> </div></div></div></div></div></div></div></div></div>"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 27
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:44.858277Z",
"start_time": "2025-03-24T00:39:44.835402Z"
}
},
"cell_type": "code",
"source": [
"#模型预测\n",
"best_model = search.best_estimator_\n",
"y_pred = best_model.predict(X_test)\n",
"#评估指标\n",
"metrics=cal_metrics(y_pred, y_test)\n",
"#输出结果\n",
"print(\"最佳参数组合:\", search.best_params_)\n",
"print(\"评估指标:\")\n",
"for k, v in metrics.items():\n",
" print(f\"{k}: {v:.2f}\")"
],
"id": "fe076794bae89ccb",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"最佳参数组合: {'colsample_bytree': 0.7508184716426058, 'gamma': 0.020833743645897518, 'learning_rate': 0.05075327204554973, 'max_depth': 5, 'n_estimators': 300, 'subsample': 0.8259294864645319}\n",
"评估指标:\n",
"RMSE: 12.30\n",
"R-squared: 0.92\n",
"MAE: 7.87\n"
]
}
],
"execution_count": 28
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-03-24T00:39:45.901545Z",
"start_time": "2025-03-24T00:39:44.946371Z"
}
},
"cell_type": "code",
"source": [
"#预测结果可视化\n",
"def plot_results(y_true, y_pred, timestamps):\n",
" plt.figure(figsize=(18, 8))\n",
" ax = plt.gca()\n",
"\n",
" # 绘制预测曲线\n",
" ax.plot(timestamps, y_true, label='真实值',\n",
" marker='o', markersize=4, linewidth=1, alpha=0.8)\n",
" ax.plot(timestamps, y_pred, label='预测值',\n",
" linestyle='--', marker='x', markersize=5, alpha=0.9)\n",
"\n",
" # 设置时间轴格式\n",
" ax.xaxis.set_major_locator(HourLocator(interval=12))\n",
" ax.xaxis.set_minor_locator(HourLocator(interval=3))\n",
" ax.xaxis.set_major_formatter(DateFormatter(\"%m-%d %H:%M\"))\n",
"\n",
" # 增强可视化元素\n",
" plt.title(f'AQI预测效果对比MAE={metrics[\"MAE\"]:.2f}, R-squared={metrics[\"R-squared\"]:.2f}',\n",
" fontsize=14, pad=20)\n",
" plt.xlabel('时间', fontsize=12)\n",
" plt.ylabel('AQI', fontsize=12)\n",
" plt.grid(True, which='both', linestyle='--', alpha=0.5)\n",
" plt.legend()\n",
"\n",
" # 自动调整标签\n",
" plt.xticks(rotation=45, ha='right')\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"plot_results(y_test, y_pred, test_data.index)\n",
"\n",
"#特征重要性可视化\n",
"def plot_importance(model, features, top_n=20):\n",
" importance = pd.Series(model.feature_importances_, index=features)\n",
" top_features = importance.sort_values(ascending=False)[:top_n]\n",
"\n",
" plt.figure(figsize=(12, 8))\n",
" ax = top_features.sort_values().plot.barh()\n",
"\n",
" # 添加数据标签\n",
" for i in ax.patches:\n",
" ax.text(i.get_width() + 0.02, i.get_y() + 0.2,\n",
" f'{i.get_width():.2f}',\n",
" fontsize=10, color='dimgrey')\n",
"\n",
" plt.title('Top {} 重要特征'.format(top_n), fontsize=14)\n",
" plt.xlabel('特征重要性', fontsize=12)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"\n",
"plot_importance(best_model, features)"
],
"id": "2551eec52baeb4cb",
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 1800x800 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAABv0AAAMVCAYAAABUfzjNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5hTVf4G8PcmmSRTM41eh6aCCEhHFCuWtYCyNmyoq4gF7F1sP9FFV0VZxF0VRRZdRUUXQcWCShUFBaTMMDP0ganJtLR7z++PkMtkUiYJSSYJ7+d55oGce+/5vjk3cyI53htJCCFARERERERERERERERERAlL09oBiIiIiIiIiIiIiIiIiOjocNGPiIiIiIiIiIiIiIiIKMFx0Y+IiIiIiIiIiIiIiIgowXHRj4iIiIiIiIiIiIiIiCjBcdGPiIiIiIiIiIiIiIiIKMFx0Y+IiIiIiIiIiIiIiIgowXHRj4iIiIiIiIiIiIiIiCjBcdGPiIiIiIiIiIiIiIiIKMFx0Y+IiIiIiIiIiIiIiIgowXHRj4iIiIiIiIiIiIiIiCjBcdGPiIiIiOgYYTabUVVVFfJxNpsNr7/+OtatW+dz+0cffYSrrrrK7/FPPvkkLr74Yvz2228t1tq1axeKioqC/tmzZ0/Iz8eXDRs24Mknn8Sff/7ptU1RFNTU1MBqtfo8dsWKFXjyySdRXl4ekSz+rFy5Mqr9E1F08HeXiIiIiGJF19oBiIiIiIjIt8bGRsiyDIPBgJSUFI9tFosF27Ztg9FohF6v9zrW6XSisbER/fr1Q1paGgDg5ptvxsqVK/Huu+/inHPOwdq1a1FYWOh1bE5ODv7yl794tN1555144YUXMGzYMK/9q6qq8MEHH+C6667D+eef77X9hx9+wKZNm1BQUNDicz7nnHN8ZvLn5JNPxq+//hr0/v4UFRXhqaeewrBhw9C3b1+PbWVlZejUqRMeeeQR/N///Z/XsStXrsRTTz2FiRMnok2bNkedxZeZM2fi4YcfxsqVKzF8+PCo1CCiyNu0aRNOP/10PProo3jyySdbOw4RERERJTku+hERERERxakXXngBTz31lFd7eno6lixZgrPPPhtGoxEGgwFmsxkAYDKZAAAOhwONjY3YsGED+vXrBwB4/PHHMWHCBJx33nmYN28e1q5di7fffhtdu3ZV+z5w4AD69u3rsejnXlT0tbgIANdddx1mzJiBkpISr201NTVYtWoVHnroIeTk5LT4nFNTU3Huuedi2bJlatv//vc/XHTRRSgsLESvXr3U9nHjxkXs6rrc3FwA8FpcdWdq+mdz7nExGAwRydLc7Nmz8cQTT+DTTz/lgh9Rgunfvz++/PJLjBs3DmlpaXjggQdaOxIRERERJTHe3pOIiIiIKAR79uyBJEkYN26cz+2KouCll15Cnz59YDAYUFBQgGeffRZOp9Pn/jfccAMkSUJdXZ3XtqlTp6K0tBT79+9HeXk5Xn31Vej1erz88ssYM2YMHA4HamtrUVFRgZNOOglXXnklKioqUFFRAbPZDLvdri74AcBJJ52E9evX45ZbbsEFF1wAg8GAYcOGYdu2berP+PHj1UWsIUOG4IorroAkSR65HA4Hxo4di0suuQQTJkzAtddei0GDBuG7777DhAkTcNFFF+Huu+8GAHz++edwOBw4/fTTUVRUhMLCQmzbtg1btmzxeStNrVYb1Hlw0+m8/z/G7777DieffDJGjRqF0aNHe/zMnDnTZz/uus2fa9NtvmoB/hdFLRYLFi1aFPyT8WHlypW455578OGHH+Kiiy7y2Hb66adDkiSMHz/eoz0nJweSJPm8qujMM8+EVqtFZWWl17bu3btDkiSfP6+//npIud3Z/P3MmzcvpP7KyspwxRVXoEOHDkhNTUWvXr3w+OOPw263q/u4f5f8/UTjKqsnn3zSo0ZqaipOOOEEPPnkk7BYLBGvl+zCPU+KouCFF15Az5491f9xoLi4OKwMdrsdzz77LE444QQYDAb06tULzz//vNccfvDgQVx33XVo06YN9Ho9unTpgpdfftlnn+eccw4+++wzPP7441i+fHlYuYiIiIiIgsEr/YiIiIiIQrBkyRIAwPLly2Gz2byu7rrnnnvw6quvYsyYMbjzzjvxv//9D48//jj27t2LN954I6RaOTk5sFgsmDp1KsaOHYuHHnoI//3vf3HJJZd47CeEQGFhISZNmtRin1lZWZgzZw6AIwtcVqsVr7zyCs4++2yP9tTUVBiNRq8+JEnC4MGDkZqa6vPqP6vVivbt2wMA/v3vfwMAzjrrLK/9+vXrh82bN3u0aTQafPXVVz4X33r37u3VNmbMGK+2jh074tJLL4Ver4dGc+T/c3zooYdw2WWXYceOHUhNTfW4qs/9XYdVVVXYtWsXAKBbt25qpkB8bd+6dSuuuOIKbNmyBf/73/983va0JbIs429/+xvuuusuXHzxxX7327Bhg/r3kpIS1NTU+NyvtrYWP//8MxRFwbJlyzBx4kSvfQoKCvD00097tfu6rWsgjz76KG6++Wav9h9++AFvvfWWz3Ppj3uRuaamBvfeey+ysrKwatUqPPvss9izZ4+6gHjrrbeqr+GmNm/ejBdeeAF9+vQJ6TmE4umnn0ZBQQEqKyvx7bff4qmnnsLXX3+NlStX+nwtU2TdcccdmDNnDi688EJMnTpVvYXxH3/8gfT09KD7URQFF110Eb799ltMnjwZ999/P77//ns8/PDDWLNmDT777DMArjnu/PPPx44dO3DrrbfihBNOwPz583HPPfcAgPo/PTR1zjnn4OGHH8Ytt9yC7du3+7yqmIiIiIjoqAkiIiIiIgrahRdeKAAIAOKrr77y2FZYWCg0Go0YOXKksNvtQgghnE6nOO2004QkSWLbtm1e/V1//fUCgKitrfVZr7GxUdx5550CgJg4caLHthkzZgij0SjS09OFVqsVqampIj09XaSnpwuj0Shmzpyp7ut0OsVzzz0n6urq1LZ7771XjBkzRjgcDgFATJ8+XVx//fVizJgxQgghxowZI66//nohhBAAxMsvv+yV76abbhJvv/22EEKIPXv2iCuvvFLU1NQIIYTYuHGjAKBuF0KI008/XYwdO1ZYrVZhsVi8+hs4cKA49dRTRWFhofrz5ptvCgDi22+/9Wg/++yzxamnnupz3JorLi4WAMTXX3+tnj+NRiO0Wq3QarVCo9EIAEKSJAFADB8+XD22trZWABAzZszw2fdrr70mAIgDBw6IxsZG8dRTT4nU1FRhMBjEgw8+6PN5BuOjjz4S2dnZfo8fM2aM+lyqqqqEEEJ88sknatv06dM99v/444/VbVdddZVXf926dRODBw8OmKmqqkqUl5f7/XGfe1+cTqfo16+fuPjii1t45p6++uorAUB88803Hu3XXnut0Ol0wmazBTz+3HPPFQMHDhSyLIdUNxjTp08XAMQvv/zi0e7+vV69enXEayazpq/bmpqagK8192t+3bp1AoCYMGGCUBRFCCFEZWWlyM7OFk899VRI9T/44AMBQLzyyise7VOmTPF4Db722mtCr9d7nHer1SoKCgpEz549/fbf2Ngo2rVrJ959992QchERERERBYu39yQiIiIiCpLVasV3332HSZMmISUlRb3qz+3zzz+Hoii48cYb1as4tFotrr/+eggh8MUXX4RUr6GhAVqtFrNmzcLChQtxwgkneGzXarUwGAyoq6tDdXU1Vq5ciX379qGurg5ardbjCrS1a9di+vTpGDFihHolm5tOp4Ner0d2dnZI+TZs2IC3334bK1asAOC6BeOiRYtw6623AnBdWde+fXtceeWV6jGNjY3Iy8uDwWBAZmamV5+yLCMtLQ29evVSfzp06AAA6Nq1q0d7enq6z9umCiHwyCOP4Ndff1Xbfv31V2i1WowcORL79u1DY2MjZFmG0+mE0+nEt99+CwD4+uuv4XQ68f3333v1+/XXX+Oxxx7Do48+ivvvvx9Tp0712G/u3Lno1asXnnrqKVx22WXYtm0bnn/+eZ/PMxgff/wxJkyYENTxGzduBOB51V9zS5Y
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 1200x800 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKcAAAMVCAYAAACiJZeNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAADR9klEQVR4nOzde1SU5733/88MR4VhEBBMDAcJBzWRQBofUbsTNLqCMVvFralb4zI7Wm0SbarWNPkl1fpEt9ldtdEnqdA2LWytYozW3WCa6mCaFCRUYyPioRKNGgHloKMwwiCH+f3BcrYERdSBMfp+rXWv5X247ut7u7QNH6+DweFwOAQAAAAAAAC4gdHdBQAAAAAAAODuRTgFAAAAAAAAtyGcAgAAAAAAgNsQTgEAAAAAAMBtCKcAAAAAAADgNoRTAAAAAAAAcBvCKQAAAAAAALgN4RQAAAAAAADchnAKAAAAAAAAbuPp7gIAAADQeS0tLTp16pQOHTqkv//975oxY4b69evX5hm73a79+/fLx8dHPj4+N/R+h8OhxsZGeXt7q3///u3uHz58WNu3b9fUqVMVGhra7v6OHTt0/PhxzZkz57p9NTY26rHHHtO///u/68UXX5TRyL+bAgBwNyKcAgAAuE2cO3dO7777rurr652H1WrV2bNndfbsWVVWVqqsrEyXLl2SJBkMBv3jH//Q1q1b5eHh4XxPWVmZhg8f7gynDAaDpNbQ6uLFiwoODpbUGg7V1NQoKCjI+YzD4dClS5f06KOP6sMPP2xXY1FRkebPn68xY8ZcNZzavn27Nm7c2Klw6u2339Znn32mBx98UEajUcXFxfL19ZWnp6eznis5HA41NTWpubn5qsEZAAD4diKcAgAAuE306tVLf/3rX3XhwgUFBwcrKChIvXr1UkREhJYvX64JEybonXfe0T333KOwsDCFhobK07P9f87df//9amxsbHd91apVmj9/vqqrqyVJubm5Gj16tA4ePKg+ffp0qsYePXo4a73swIEDMhgM8vDwUH19vSTpn//8pySpoaFBRqNRgwYNavOe/fv367XXXtPQoUP1zjvvSJKSkpLU3Nx83Rruv/9+HT16tFP1AgCA2x/hFAAA6HYnTpxoNxXtSo899pg++eST7ivoGxobG7V8+XJlZmaqrKxMAQEBmjx5sn75y1/Kz8+vzbO/+93vtHLlSh0/flyJiYn61a9+pYcffviG+7x48aK8vLz05z//ud2ooaamJi1fvlwPPfSQnnzyyXZtm5ubZbfb5e3tLS8vL+f1hoaGNiHV5RFXNptNUutIKkmqq6tzXpPkHL102ZkzZ/Twww/r5z//uQIDAyVJ3t7ezvtTpkzRwYMH29Q0YMAA568feugh7du3z3n+9ddf68knn1SvXr20efNm57s++OADeXt7y2g0ymg06i9/+Yv+67/+S7/61a80cOBAORwONTc3txklBgAAvv0IpwAAQLfr3bu31q1bJ6k1QJk1a5b+5V/+RbNnz5YkhYWFubM8/fCHP9Svf/1rPf3003r88cf197//Xb/5zW905swZ/elPf3I+9/Of/1w/+clP9N3vflc/+MEP9Kc//UmjRo3SwYMHdc8999xQn9/73veuOo3uSkuXLtXSpUuvef+jjz5Samqq8/zFF1/U7373u3bPmUymNuf3339/m/ONGzfqe9/7nvP8q6++0unTpxUdHa1z5861e9/f//53+fj4yNPTUz/72c+UlZWlEydOSGoNzhoaGpzP7t+/X2PHjlV1dbU++eQTeXt76+zZswoODm4XvF1+x8MPP6zk5ORrfjcAAPh2I5wCAADdzs/PT88884yk1lE8s2bNUnR0tPOaOx04cEAZGRlavXq1fvjDH0qSvv/976u+vl4bNmxQaWmp7rvvPp06dUqLFy/W0KFDtXPnTnl7e2vOnDkaNGiQXnvtNf3+97+/oX5/85vfqLm5+arrLTU3N+u+++7TwoUL9eMf/7hd25aWFtnt9nZT83x9fdWnTx/t2bNHUusor5/97Gc6deqUJCk/P1///u//rn/84x/q3bu3jhw5olGjRsnX17fNe4qKiiRJmZmZOn36tCRpwYIFampq0m9/+9t2o8mu5OHhoZ49ezrPAwIC5Onpqf/+7/9WcnKyXnrpJW3cuFF//vOf9Z3vfKezv10AAOAOQjgFAABwhZaWFq1YsUJz585tcz0xMVEbNmxQVVWV7rvvPn3wwQdqaGjQj3/8Y+e0NB8fHz333HP6+c9/rt/+9rc3NP3s3nvvldQ6he+b6yldXofJbrfr/Pnzbe6ZTCb17dv3qu/09vaWh4eH7rvvPkmS2WyWJOd5SEiIJOmee+5Rnz59nO++csqeJBUUFOiee+5RVVWVzp49K0k6ffq0mpqaVFFRocrKSvXo0UMeHh6qrq5WY2Ojc80pqe26U1FRUTpw4ID8/Px09OhR/frXv1ZcXJwGDhzY6d8rAABwZ2G/XgAAcNtbu3atBg0aJB8fH0VHR+vNN99US0uL8/6zzz6rwMBAvf/++4qJiZGPj48SExOvO03uahISEvTKK6/IaGz7n0mXp67Fx8dLkoqLiyVJKSkpbZ77zne+I6vVquPHj99w35JUWVmpAQMGtDkefPBBSdKvfvWrdvd+9KMfXfNdly5dUnNzs06cOKETJ044p+RdPq+oqJAknTp1SidOnFB5ebmz3ZU++eQTPffcc/qf//kfvfbaa5Kk7OxsWSwWffzxxxo8eLAefPBBDRgwQL/61a9UXl7epsbExERNnz7d+T4/Pz+1tLRoxowZ8vb21ubNm9WjRw/l5eVpypQpzpFa39TU1KSLFy86F10HAAB3BsIpAABwW1u+fLlmzJihsLAw/fKXv9SQIUP06quvasaMGW2es9lsmjFjhv7t3/5Nb775pux2u8aNG6fc3NxbrqGkpER/+tOfNHXqVOcUNavVqsDAQAUFBbV59vJ6WZfXS7pRl3fD+/Wvf63GxkY1NjY6Fy7/6U9/6rzW2Niohx56qN0UvCvZ7XadOXNG/fr1U79+/fTGG29IkvP88jTK//N//o/69eunJ554wtnuMpvNpjFjxmjs2LFX7WPatGlqaGhQc3OzHA6H88jLy5MkHT9+XA0NDSooKGjT7oc//KEKCgo0ZcoUxcXFSZLKy8v13nvvOUdnXTZ06FAZDAZ5eXnJ399fP/3pTzv3mwkAAL4VCKcAAMBt69SpU/rZz36m0aNHy2Kx6MUXX1R2drbmzZunP/zhD9q5c6fz2ebmZq1YsUL/9V//pfnz5+uTTz6Rl5eXli1bdks1tLS0aNasWfL19XWGO5ev+/v7t3v+cnj1zel3nXV5l7w5c+bIy8tLXl5ezgDqjTfecF7z8vJSUVFRh1MH33333TaBkcPhUE5OjgYNGqQDBw60u3f5uHIxdH9/f/3mN7/R0KFD27z7vffe0/jx41VXVyeHw9FunawrGY3GNnW+8cYb+tWvfuW8d9nl7/xm4PbWW2/po48+0gcffKD3339f06ZN6/D3EAAAfLuw5hQAALht/eUvf1FTU5NefPHFNuHHvHnz9Pbbb2vbtm16/PHHndefffZZ56/79Omjf/mXf3GO4LlZixcvVl5entauXdtmbacrF/m+ksPhkNR29NGNaGpqkiStXLlSU6dOlXTtBdEffvhh53pUVzpy5IguXbokLy+vNtftdrvmzJmjxsZGVVZWXjPYstlsslqteuCBB5xrYZ05c0Y5OTnKysqSJP3iF7/Q1KlTdejQIQ0bNuya39OvXz9J0owZM5SVlaU//OEPWrx4saZNm6a//OUvbZ69XM83p1QmJyezWx8AAHcwwikAAHDbqqyslKR2C35fXtD78ppJUutom8sLfl8WEhKihoYGNTQ0yMfH54b7/5//+R/953/+p55//vk2ayZdrun06dNqbGxsEwJVVVVJat2V7mZcXk9p4cKFWrhwYZt7K1eu1MqVK6/6/JWef/55ffbZZ+rRo4cz6HE4HDp37py8vLwUEBCgyZMnS2oNvs6fPy8/Pz/niKXLv2fZ2dn6t3/7N40ZM0Z/+ctf1KNHDyUkJEi
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 29
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}