diff --git a/air_quality_prediction.ipynb b/air_quality_prediction.ipynb index 066257a..f3cc11e 100644 --- a/air_quality_prediction.ipynb +++ b/air_quality_prediction.ipynb @@ -16,8 +16,8 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2025-03-24T08:52:34.979118Z", - "start_time": "2025-03-24T08:52:34.974080Z" + "end_time": "2025-03-24T09:29:32.889415Z", + "start_time": "2025-03-24T09:29:32.882312Z" } }, "cell_type": "code", @@ -53,13 +53,13 @@ ], "id": "initial_id", "outputs": [], - "execution_count": 54 + "execution_count": 48 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:35.333501Z", - "start_time": "2025-03-24T08:52:34.979118Z" + "end_time": "2025-03-24T09:29:33.256188Z", + "start_time": "2025-03-24T09:29:32.892428Z" } }, "cell_type": "code", @@ -80,18 +80,218 @@ " plt.rcParams['font.family'] = font_prop.get_name()\n", " except:\n", " print(f\"警告:{font_path} 字体加载失败,请检查路径有效性\")\n", - "# 读取数据\n", - "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", - "data.head()\n", "\n", "try:\n", " os.mkdir('./images')\n", "except FileExistsError:\n", - " pass" + " pass\n", + "#读取数据\n", + "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", + "data.head()" ], "id": "92ea7ba1218799cd", - "outputs": [], - "execution_count": 55 + "outputs": [ + { + "data": { + "text/plain": [ + " date hour AQI CO NO2 O3 PM10 \\\n", + "0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n", + "1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n", + "2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n", + "3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n", + "4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n", + "\n", + " PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n", + "0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n", + "1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n", + "2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n", + "3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n", + "4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n", + "\n", + " RRR tR \n", + "0 0.0 12 \n", + "1 0.0 12 \n", + "2 0.0 12 \n", + "3 0.0 12 \n", + "4 0.0 12 \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datehourAQICONO2O3PM10PM2.5SO2T...PPaUFfTnTxVVTdRRRtR
02022-11-01218.3714290.21142923.77142929.05714313.2571433.0571432.6285716.7...770.50.136.01.05.317.330.0-7.30.012
12022-11-01521.9142860.18000026.57142920.14285718.9142863.7714292.5428572.0...770.80.362.00.01.917.37.0-4.50.012
22022-11-01828.6285710.31142930.02857114.28571427.9428576.8571432.4000006.6...771.70.956.00.00.917.310.0-7.10.012
32022-11-011119.0000000.23714317.97142940.52941217.8529415.9142862.17647113.5...771.3-0.419.02.00.917.330.0-9.70.012
42022-11-011421.7428570.25294115.58823553.61764720.9411766.7428572.00000015.7...768.6-2.719.02.00.917.330.0-7.90.012
\n", + "

5 rows × 21 columns

\n", + "
" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 49 }, { "metadata": {}, @@ -105,8 +305,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:35.402673Z", - "start_time": "2025-03-24T08:52:35.388099Z" + "end_time": "2025-03-24T09:29:33.295495Z", + "start_time": "2025-03-24T09:29:33.282885Z" } }, "cell_type": "code", @@ -120,7 +320,7 @@ "indicators = ['AQI', 'PM2.5', 'PM10', 'CO', 'NO2', 'O3','SO2']\n", "colors = ['#2d87bb', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#1f77b4', '#ffbb78', '#98df8a', '#d62728',]\n", "\n", - "normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)\n" + "normalized = (hourly_data[indicators] - hourly_data[indicators].mean(axis=0)) / hourly_data[indicators].std(axis=0)" ], "id": "118b1b48e798a7ba", "outputs": [ @@ -134,13 +334,13 @@ "output_type": "display_data" } ], - "execution_count": 56 + "execution_count": 50 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:37.573757Z", - "start_time": "2025-03-24T08:52:35.462160Z" + "end_time": "2025-03-24T09:29:35.233918Z", + "start_time": "2025-03-24T09:29:33.325526Z" } }, "cell_type": "code", @@ -189,13 +389,13 @@ "output_type": "display_data" } ], - "execution_count": 57 + "execution_count": 51 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:42.599194Z", - "start_time": "2025-03-24T08:52:37.631367Z" + "end_time": "2025-03-24T09:29:40.302614Z", + "start_time": "2025-03-24T09:29:35.234940Z" } }, "cell_type": "code", @@ -310,7 +510,7 @@ "output_type": "display_data" } ], - "execution_count": 58 + "execution_count": 52 }, { "metadata": {}, @@ -326,8 +526,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:43.683653Z", - "start_time": "2025-03-24T08:52:42.659596Z" + "end_time": "2025-03-24T09:29:41.397475Z", + "start_time": "2025-03-24T09:29:40.304595Z" } }, "cell_type": "code", @@ -350,13 +550,13 @@ "output_type": "display_data" } ], - "execution_count": 59 + "execution_count": 53 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:44.613944Z", - "start_time": "2025-03-24T08:52:43.739907Z" + "end_time": "2025-03-24T09:29:42.305851Z", + "start_time": "2025-03-24T09:29:41.400536Z" } }, "cell_type": "code", @@ -485,7 +685,7 @@ "output_type": "display_data" } ], - "execution_count": 60 + "execution_count": 54 }, { "metadata": {}, @@ -499,22 +699,22 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:45.132946Z", - "start_time": "2025-03-24T08:52:44.775009Z" + "end_time": "2025-03-24T09:29:42.661368Z", + "start_time": "2025-03-24T09:29:42.305851Z" } }, "cell_type": "code", "source": [ "#重新读取数据\n", "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", - "# 合并 date 和 hour 为新的 data_hour 列\n", - "data['data_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n", + "# 合并 date 和 hour 为新的 date_hour 列\n", + "data['date_hour'] = pd.to_datetime(data['date']) + pd.to_timedelta(data['hour'], unit='h')\n", "# 设置 data_hour 为索引列\n", - "data = data[['data_hour', 'AQI']].set_index('data_hour') # 仅保留时间和AQI" + "data = data[['date_hour','date','hour', 'AQI']].set_index('date_hour') # 仅保留时间和AQI" ], "id": "d1bdac1e4e1562f2", "outputs": [], - "execution_count": 61 + "execution_count": 55 }, { "metadata": {}, @@ -525,8 +725,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:46.248860Z", - "start_time": "2025-03-24T08:52:45.190173Z" + "end_time": "2025-03-24T09:29:43.715506Z", + "start_time": "2025-03-24T09:29:42.661368Z" } }, "cell_type": "code", @@ -599,7 +799,7 @@ ] } ], - "execution_count": 62 + "execution_count": 56 }, { "metadata": {}, @@ -610,17 +810,18 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:52:46.324908Z", - "start_time": "2025-03-24T08:52:46.305955Z" + "end_time": "2025-03-24T09:29:43.741321Z", + "start_time": "2025-03-24T09:29:43.717328Z" } }, "cell_type": "code", "source": [ "\"\"\"\n", "该模型在假设不考虑测试集其他指标的情况下,仅使用AQI数据对未来AQI进行<单步预测>,即每次预测都是根据之前时间点的真实AQI值进行的。\n", - "整体运行时间约为20s,请耐心等待。\n", + "整体运行时间约为25s,请耐心等待。\n", "\"\"\"\n", "#特征工程\n", + "data=data[['AQI']]\n", "data_processed = data.copy()\n", "\n", "#时间分解特征\n", @@ -652,13 +853,13 @@ ], "id": "66f104e110aba36", "outputs": [], - "execution_count": 63 + "execution_count": 57 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-03-24T08:53:09.696324Z", - "start_time": "2025-03-24T08:52:46.375978Z" + "end_time": "2025-03-24T09:30:07.406880Z", + "start_time": "2025-03-24T09:29:43.754467Z" } }, "cell_type": "code", @@ -709,15 +910,15 @@ " importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=...\n", - " 'learning_rate': ,\n", - " 'max_depth': ,\n", + " 'learning_rate': ,\n", + " 'max_depth': ,\n", " 'n_estimators': [100, 200, 300],\n", - " 'subsample': },\n", + " 'subsample': },\n", " random_state=42, scoring='neg_mean_absolute_error',\n", " verbose=1)" ], "text/html": [ - "
RandomizedSearchCV(cv=3,\n",
+       "
RandomizedSearchCV(cv=3,\n",
        "                   estimator=XGBRegressor(base_score=None, booster=None,\n",
        "                                          callbacks=None,\n",
        "                                          colsample_bylevel=None,\n",
@@ -1134,12 +1335,12 @@
        "                                          importance_type=None,\n",
        "                                          interaction_constraints=None,\n",
        "                                          learning_rate=...\n",
-       "                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000002D1B4343E00>,\n",
-       "                                        'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000002D1ACAE9E20>,\n",
+       "                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E8657920C0>,\n",
+       "                                        'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001E86E630FE0>,\n",
        "                                        'n_estimators': [100, 200, 300],\n",
-       "                                        'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000002D1B224B620>},\n",
+       "                                        'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001E86E037200>},\n",
        "                   random_state=42, scoring='neg_mean_absolute_error',\n",
-       "                   verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.