commit 6ae57ad9f221e6b65bfd58d6cded114549c64d61 Author: Bairly <2652270566@qq.com> Date: Mon Mar 24 09:57:14 2025 +0800 first commit diff --git a/.ipynb_checkpoints/air_quality_prediction-checkpoint.ipynb b/.ipynb_checkpoints/air_quality_prediction-checkpoint.ipynb new file mode 100644 index 0000000..8bb5f9b --- /dev/null +++ b/.ipynb_checkpoints/air_quality_prediction-checkpoint.ipynb @@ -0,0 +1,467 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# 预测建模\n", + "北京市空气质量指数预测(推荐难度系数10)\n", + "\n", + "这个数据集是北京市2022年11月1日至2023年10月31日期间空气质量相关数据。\n", + "根据这个数据集,回答以下问题" + ], + "id": "b610f839dca4877" + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-03-22T07:55:04.926730Z", + "start_time": "2025-03-22T07:55:03.071940Z" + } + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from calculate import *\n", + "from heatmap import *" + ], + "outputs": [], + "execution_count": 1 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-03-22T07:55:05.632142Z", + "start_time": "2025-03-22T07:55:04.941177Z" + } + }, + "cell_type": "code", + "source": [ + "#读取数据\n", + "data=pd.read_excel('北京市空气质量指数与气象数据.xlsx')\n", + "data.head()" + ], + "id": "92ea7ba1218799cd", + "outputs": [ + { + "data": { + "text/plain": [ + " date hour AQI CO NO2 O3 PM10 \\\n", + "0 2022-11-01 2 18.371429 0.211429 23.771429 29.057143 13.257143 \n", + "1 2022-11-01 5 21.914286 0.180000 26.571429 20.142857 18.914286 \n", + "2 2022-11-01 8 28.628571 0.311429 30.028571 14.285714 27.942857 \n", + "3 2022-11-01 11 19.000000 0.237143 17.971429 40.529412 17.852941 \n", + "4 2022-11-01 14 21.742857 0.252941 15.588235 53.617647 20.941176 \n", + "\n", + " PM2.5 SO2 T ... P Pa U Ff Tn Tx VV Td \\\n", + "0 3.057143 2.628571 6.7 ... 770.5 0.1 36.0 1.0 5.3 17.3 30.0 -7.3 \n", + "1 3.771429 2.542857 2.0 ... 770.8 0.3 62.0 0.0 1.9 17.3 7.0 -4.5 \n", + "2 6.857143 2.400000 6.6 ... 771.7 0.9 56.0 0.0 0.9 17.3 10.0 -7.1 \n", + "3 5.914286 2.176471 13.5 ... 771.3 -0.4 19.0 2.0 0.9 17.3 30.0 -9.7 \n", + "4 6.742857 2.000000 15.7 ... 768.6 -2.7 19.0 2.0 0.9 17.3 30.0 -7.9 \n", + "\n", + " RRR tR \n", + "0 0.0 12 \n", + "1 0.0 12 \n", + "2 0.0 12 \n", + "3 0.0 12 \n", + "4 0.0 12 \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "
\n", + " | date | \n", + "hour | \n", + "AQI | \n", + "CO | \n", + "NO2 | \n", + "O3 | \n", + "PM10 | \n", + "PM2.5 | \n", + "SO2 | \n", + "T | \n", + "... | \n", + "P | \n", + "Pa | \n", + "U | \n", + "Ff | \n", + "Tn | \n", + "Tx | \n", + "VV | \n", + "Td | \n", + "RRR | \n", + "tR | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "2022-11-01 | \n", + "2 | \n", + "18.371429 | \n", + "0.211429 | \n", + "23.771429 | \n", + "29.057143 | \n", + "13.257143 | \n", + "3.057143 | \n", + "2.628571 | \n", + "6.7 | \n", + "... | \n", + "770.5 | \n", + "0.1 | \n", + "36.0 | \n", + "1.0 | \n", + "5.3 | \n", + "17.3 | \n", + "30.0 | \n", + "-7.3 | \n", + "0.0 | \n", + "12 | \n", + "
1 | \n", + "2022-11-01 | \n", + "5 | \n", + "21.914286 | \n", + "0.180000 | \n", + "26.571429 | \n", + "20.142857 | \n", + "18.914286 | \n", + "3.771429 | \n", + "2.542857 | \n", + "2.0 | \n", + "... | \n", + "770.8 | \n", + "0.3 | \n", + "62.0 | \n", + "0.0 | \n", + "1.9 | \n", + "17.3 | \n", + "7.0 | \n", + "-4.5 | \n", + "0.0 | \n", + "12 | \n", + "
2 | \n", + "2022-11-01 | \n", + "8 | \n", + "28.628571 | \n", + "0.311429 | \n", + "30.028571 | \n", + "14.285714 | \n", + "27.942857 | \n", + "6.857143 | \n", + "2.400000 | \n", + "6.6 | \n", + "... | \n", + "771.7 | \n", + "0.9 | \n", + "56.0 | \n", + "0.0 | \n", + "0.9 | \n", + "17.3 | \n", + "10.0 | \n", + "-7.1 | \n", + "0.0 | \n", + "12 | \n", + "
3 | \n", + "2022-11-01 | \n", + "11 | \n", + "19.000000 | \n", + "0.237143 | \n", + "17.971429 | \n", + "40.529412 | \n", + "17.852941 | \n", + "5.914286 | \n", + "2.176471 | \n", + "13.5 | \n", + "... | \n", + "771.3 | \n", + "-0.4 | \n", + "19.0 | \n", + "2.0 | \n", + "0.9 | \n", + "17.3 | \n", + "30.0 | \n", + "-9.7 | \n", + "0.0 | \n", + "12 | \n", + "
4 | \n", + "2022-11-01 | \n", + "14 | \n", + "21.742857 | \n", + "0.252941 | \n", + "15.588235 | \n", + "53.617647 | \n", + "20.941176 | \n", + "6.742857 | \n", + "2.000000 | \n", + "15.7 | \n", + "... | \n", + "768.6 | \n", + "-2.7 | \n", + "19.0 | \n", + "2.0 | \n", + "0.9 | \n", + "17.3 | \n", + "30.0 | \n", + "-7.9 | \n", + "0.0 | \n", + "12 | \n", + "
5 rows × 21 columns
\n", + "\n", + " | date | \n", + "hour | \n", + "AQI | \n", + "CO | \n", + "NO2 | \n", + "O3 | \n", + "PM10 | \n", + "PM2.5 | \n", + "SO2 | \n", + "T | \n", + "... | \n", + "P | \n", + "Pa | \n", + "U | \n", + "Ff | \n", + "Tn | \n", + "Tx | \n", + "VV | \n", + "Td | \n", + "RRR | \n", + "tR | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "2022-11-01 | \n", + "2 | \n", + "18.371429 | \n", + "0.211429 | \n", + "23.771429 | \n", + "29.057143 | \n", + "13.257143 | \n", + "3.057143 | \n", + "2.628571 | \n", + "6.7 | \n", + "... | \n", + "770.5 | \n", + "0.1 | \n", + "36.0 | \n", + "1.0 | \n", + "5.3 | \n", + "17.3 | \n", + "30.0 | \n", + "-7.3 | \n", + "0.0 | \n", + "12 | \n", + "
1 | \n", + "2022-11-01 | \n", + "5 | \n", + "21.914286 | \n", + "0.180000 | \n", + "26.571429 | \n", + "20.142857 | \n", + "18.914286 | \n", + "3.771429 | \n", + "2.542857 | \n", + "2.0 | \n", + "... | \n", + "770.8 | \n", + "0.3 | \n", + "62.0 | \n", + "0.0 | \n", + "1.9 | \n", + "17.3 | \n", + "7.0 | \n", + "-4.5 | \n", + "0.0 | \n", + "12 | \n", + "
2 | \n", + "2022-11-01 | \n", + "8 | \n", + "28.628571 | \n", + "0.311429 | \n", + "30.028571 | \n", + "14.285714 | \n", + "27.942857 | \n", + "6.857143 | \n", + "2.400000 | \n", + "6.6 | \n", + "... | \n", + "771.7 | \n", + "0.9 | \n", + "56.0 | \n", + "0.0 | \n", + "0.9 | \n", + "17.3 | \n", + "10.0 | \n", + "-7.1 | \n", + "0.0 | \n", + "12 | \n", + "
3 | \n", + "2022-11-01 | \n", + "11 | \n", + "19.000000 | \n", + "0.237143 | \n", + "17.971429 | \n", + "40.529412 | \n", + "17.852941 | \n", + "5.914286 | \n", + "2.176471 | \n", + "13.5 | \n", + "... | \n", + "771.3 | \n", + "-0.4 | \n", + "19.0 | \n", + "2.0 | \n", + "0.9 | \n", + "17.3 | \n", + "30.0 | \n", + "-9.7 | \n", + "0.0 | \n", + "12 | \n", + "
4 | \n", + "2022-11-01 | \n", + "14 | \n", + "21.742857 | \n", + "0.252941 | \n", + "15.588235 | \n", + "53.617647 | \n", + "20.941176 | \n", + "6.742857 | \n", + "2.000000 | \n", + "15.7 | \n", + "... | \n", + "768.6 | \n", + "-2.7 | \n", + "19.0 | \n", + "2.0 | \n", + "0.9 | \n", + "17.3 | \n", + "30.0 | \n", + "-7.9 | \n", + "0.0 | \n", + "12 | \n", + "
5 rows × 21 columns
\n", + "RandomizedSearchCV(cv=3,\n", + " estimator=XGBRegressor(base_score=None, booster=None,\n", + " callbacks=None,\n", + " colsample_bylevel=None,\n", + " colsample_bynode=None,\n", + " colsample_bytree=None, device=None,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False,\n", + " eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=...\n", + " 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86B7EC90>,\n", + " 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001EBFF5165D0>,\n", + " 'n_estimators': [100, 200, 300],\n", + " 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86A1C5F0>},\n", + " scoring='neg_mean_absolute_error', verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=3,\n", + " estimator=XGBRegressor(base_score=None, booster=None,\n", + " callbacks=None,\n", + " colsample_bylevel=None,\n", + " colsample_bynode=None,\n", + " colsample_bytree=None, device=None,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False,\n", + " eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=...\n", + " 'learning_rate': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86B7EC90>,\n", + " 'max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000001EBFF5165D0>,\n", + " 'n_estimators': [100, 200, 300],\n", + " 'subsample': <scipy.stats._distn_infrastructure.rv_continuous_frozen object at 0x000001EB86A1C5F0>},\n", + " scoring='neg_mean_absolute_error', verbose=1)
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.7508184716426058, device=None,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, feature_types=None, gamma=0.020833743645897518,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05075327204554973,\n", + " max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=5, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=300, n_jobs=-1,\n", + " num_parallel_tree=None, random_state=42, ...)
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.7508184716426058, device=None,\n", + " early_stopping_rounds=None, enable_categorical=False,\n", + " eval_metric=None, feature_types=None, gamma=0.020833743645897518,\n", + " grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.05075327204554973,\n", + " max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=5, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=300, n_jobs=-1,\n", + " num_parallel_tree=None, random_state=42, ...)