From 7c7137380c4d2a4bc39750376f1abf660ec501a2 Mon Sep 17 00:00:00 2001 From: Bairly <2652270566@qq.com> Date: Mon, 24 Mar 2025 17:31:14 +0800 Subject: [PATCH] forth commit --- air_quality_prediction.ipynb | 411 ++++++++++++++++++++++++++--------- 1 file changed, 306 insertions(+), 105 deletions(-) diff --git a/air_quality_prediction.ipynb b/air_quality_prediction.ipynb index 066257a..f3cc11e 100644 --- a/air_quality_prediction.ipynb +++ b/air_quality_prediction.ipynb @@ -16,8 +16,8 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2025-03-24T08:52:34.979118Z", - "start_time": "2025-03-24T08:52:34.974080Z" + "end_time": "2025-03-24T09:29:32.889415Z", + "start_time": "2025-03-24T09:29:32.882312Z" } }, "cell_type": "code", @@ -53,13 +53,13 @@ ], "id": "initial_id", "outputs": [], - "execution_count": 54 + "execution_count": 48 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:35.333501Z", - "start_time": "2025-03-24T08:52:34.979118Z" + "end_time": "2025-03-24T09:29:33.256188Z", + "start_time": "2025-03-24T09:29:32.892428Z" } }, "cell_type": "code", @@ -80,18 +80,218 @@ " plt.rcParams['font.family'] = font_prop.get_name()\n", " except:\n", " print(f\"警告:{font_path} 字体加载失败,请检查路径有效性\")\n", - "# 读取数据\n", - "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", - "data.head()\n", "\n", "try:\n", " os.mkdir('./images')\n", "except FileExistsError:\n", - " pass" + " pass\n", + "#读取数据\n", + "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", + "data.head()" ], "id": "92ea7ba1218799cd", - "outputs": [], - "execution_count": 55 + "outputs": [ + { + "data": { + "text/plain": [ + " date hour AQI CO NO2 O3 PM10 \\\n", + "0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n", + "1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n", + "2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n", + "3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n", + "4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n", + "\n", + " PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n", + "0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n", + "1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n", + "2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n", + "3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n", + "4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n", + "\n", + " RRR tR \n", + "0 0.0 12 \n", + "1 0.0 12 \n", + "2 0.0 12 \n", + "3 0.0 12 \n", + "4 0.0 12 \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datehourAQICONO2O3PM10PM2.5SO2T...PPaUFfTnTxVVTdRRRtR
02022-11-01218.3714290.21142923.77142929.05714313.2571433.0571432.6285716.7...770.50.136.01.05.317.330.0-7.30.012
12022-11-01521.9142860.18000026.57142920.14285718.9142863.7714292.5428572.0...770.80.362.00.01.917.37.0-4.50.012
22022-11-01828.6285710.31142930.02857114.28571427.9428576.8571432.4000006.6...771.70.956.00.00.917.310.0-7.10.012
32022-11-011119.0000000.23714317.97142940.52941217.8529415.9142862.17647113.5...771.3-0.419.02.00.917.330.0-9.70.012
42022-11-011421.7428570.25294115.58823553.61764720.9411766.7428572.00000015.7...768.6-2.719.02.00.917.330.0-7.90.012
\n", + "

5 rows × 21 columns

\n", + "
" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 49 }, { "metadata": {}, @@ -105,8 +305,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:35.402673Z", - "start_time": "2025-03-24T08:52:35.388099Z" + "end_time": "2025-03-24T09:29:33.295495Z", + "start_time": "2025-03-24T09:29:33.282885Z" } }, "cell_type": "code", @@ -120,7 +320,7 @@ "indicators = ['AQI', 'PM2.5', 'PM10', 'CO', 'NO2', 'O3','SO2']\n", "colors = ['#2d87bb', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728',]\n", "\n", - "normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)\n" + "normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)" ], "id": "118b1b48e798a7ba", "outputs": [ @@ -134,13 +334,13 @@ "output_type": "display_data" } ], - "execution_count": 56 + "execution_count": 50 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:37.573757Z", - "start_time": "2025-03-24T08:52:35.462160Z" + "end_time": "2025-03-24T09:29:35.233918Z", + "start_time": "2025-03-24T09:29:33.325526Z" } }, "cell_type": "code", @@ -189,13 +389,13 @@ "output_type": "display_data" } ], - "execution_count": 57 + "execution_count": 51 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:42.599194Z", - "start_time": "2025-03-24T08:52:37.631367Z" + "end_time": "2025-03-24T09:29:40.302614Z", + "start_time": "2025-03-24T09:29:35.234940Z" } }, "cell_type": "code", @@ -310,7 +510,7 @@ "output_type": "display_data" } ], - "execution_count": 58 + "execution_count": 52 }, { "metadata": {}, @@ -326,8 +526,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:43.683653Z", - "start_time": "2025-03-24T08:52:42.659596Z" + "end_time": "2025-03-24T09:29:41.397475Z", + "start_time": "2025-03-24T09:29:40.304595Z" } }, "cell_type": "code", @@ -350,13 +550,13 @@ "output_type": "display_data" } ], - "execution_count": 59 + "execution_count": 53 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:44.613944Z", - "start_time": "2025-03-24T08:52:43.739907Z" + "end_time": "2025-03-24T09:29:42.305851Z", + "start_time": "2025-03-24T09:29:41.400536Z" } }, "cell_type": "code", @@ -485,7 +685,7 @@ "output_type": "display_data" } ], - "execution_count": 60 + "execution_count": 54 }, { "metadata": {}, @@ -499,22 +699,22 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:45.132946Z", - "start_time": "2025-03-24T08:52:44.775009Z" + "end_time": "2025-03-24T09:29:42.661368Z", + "start_time": "2025-03-24T09:29:42.305851Z" } }, "cell_type": "code", "source": [ "#重新读取数据\n", "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", - "# 合并 date 和 hour 为新的 data_hour 列\n", - "data['data_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n", + "# 合并 date 和 hour 为新的 date_hour 列\n", + "data['date_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n", "# 设置 data_hour 为索引列\n", - "data = data[['data_hour', 'AQI']].set_index('data_hour') # 仅保留时间和AQI" + "data = data[['date_hour','date','hour', 'AQI']].set_index('date_hour') # 仅保留时间和AQI" ], "id": "d1bdac1e4e1562f2", "outputs": [], - "execution_count": 61 + "execution_count": 55 }, { "metadata": {}, @@ -525,8 +725,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:46.248860Z", - "start_time": "2025-03-24T08:52:45.190173Z" + "end_time": "2025-03-24T09:29:43.715506Z", + "start_time": "2025-03-24T09:29:42.661368Z" } }, "cell_type": "code", @@ -599,7 +799,7 @@ ] } ], - "execution_count": 62 + "execution_count": 56 }, { "metadata": {}, @@ -610,17 +810,18 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:46.324908Z", - "start_time": "2025-03-24T08:52:46.305955Z" + "end_time": "2025-03-24T09:29:43.741321Z", + "start_time": "2025-03-24T09:29:43.717328Z" } }, "cell_type": "code", "source": [ "\"\"\"\n", "该模型在假设不考虑测试集其他指标的情况下,仅使用AQI数据对未来AQI进行<单步预测>,即每次预测都是根据之前时间点的真实AQI值进行的。\n", - "整体运行时间约为20s,请耐心等待。\n", + "整体运行时间约为25s,请耐心等待。\n", "\"\"\"\n", "#特征工程\n", + "data=data[['AQI']]\n", "data_processed = data.copy()\n", "\n", "#时间分解特征\n", @@ -652,13 +853,13 @@ ], "id": "66f104e110aba36", "outputs": [], - "execution_count": 63 + "execution_count": 57 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:53:09.696324Z", - "start_time": "2025-03-24T08:52:46.375978Z" + "end_time": "2025-03-24T09:30:07.406880Z", + "start_time": "2025-03-24T09:29:43.754467Z" } }, "cell_type": "code", @@ -709,15 +910,15 @@ " importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=...\n", - " 'learning_rate': ,\n", - " 'max_depth': ,\n", + " 'learning_rate': ,\n", + " 'max_depth': ,\n", " 'n_estimators': [100, 200, 300],\n", - " 'subsample': },\n", + " 'subsample': },\n", " random_state=42, scoring='neg_mean_absolute_error',\n", " verbose=1)" ], "text/html": [ - "
RandomizedSearchCV(cv=3,\n",
+       "
RandomizedSearchCV(cv=3,\n",
        "                   estimator=XGBRegressor(base_score=None, booster=None,\n",
        "                                          callbacks=None,\n",
        "                                          colsample_bylevel=None,\n",
@@ -1134,12 +1335,12 @@
        "                                          importance_type=None,\n",
        "                                          interaction_constraints=None,\n",
        "                                          learning_rate=...\n",
-       "                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000002D1B4343E00>,\n",
-       "                                        'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000002D1ACAE9E20>,\n",
+       "                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E8657920C0>,\n",
+       "                                        'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001E86E630FE0>,\n",
        "                                        'n_estimators': [100, 200, 300],\n",
-       "                                        'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000002D1B224B620>},\n",
+       "                                        'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E86E037200>},\n",
        "                   random_state=42, scoring='neg_mean_absolute_error',\n",
-       "                   verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.