Skip to content

Instantly share code, notes, and snippets.

@tado
Created December 27, 2025 06:37
Show Gist options
  • Select an option

  • Save tado/cfcfddd0eab0f1687718e2bd9d88d75e to your computer and use it in GitHub Desktop.

Select an option

Save tado/cfcfddd0eab0f1687718e2bd9d88d75e to your computer and use it in GitHub Desktop.
日本の気温の変動のデータから100年後の気温を予測
import requests
import pandas as pd
import matplotlib.pyplot as plt
import json
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.interpolate import UnivariateSpline
from bs4 import BeautifulSoup
def download_japan_temperature_data():
"""気象庁から日本の気温偏差データをダウンロード"""
try:
# 気象庁の年平均気温偏差データ
url = "https://www.data.jma.go.jp/cpdinfo/temp/list/an_jpn.html"
response = requests.get(url, timeout=15)
response.encoding = 'shift_jis'
response.raise_for_status()
# HTMLを解析
soup = BeautifulSoup(response.text, 'html.parser')
pre_tag = soup.find('pre')
if pre_tag:
lines = pre_tag.text.strip().split('\n')
data = []
for line in lines:
parts = line.split()
if len(parts) >= 2:
try:
year = int(parts[0])
if 1898 <= year <= 2030: # 妥当な年の範囲
temp_anomaly = float(parts[1])
data.append({'Year': year, 'Temperature': temp_anomaly})
except (ValueError, IndexError):
continue
if data:
df = pd.DataFrame(data)
df = df.sort_values('Year')
return df
return create_japan_sample_data()
except Exception as e:
print(f"気象庁データ取得エラー: {e}")
return create_japan_sample_data()
def create_japan_sample_data():
"""日本のサンプル偏差データを作成(デモ用)"""
print("サンプルデータを使用します...")
years = list(range(1898, 2024))
# 日本の気温偏差に近いサンプルデータ
temps = []
for year in years:
if year < 1950:
temp = -0.5 + (year - 1898) * 0.005 + np.random.normal(0, 0.3)
elif year < 1980:
temp = -0.3 + (year - 1950) * 0.008 + np.random.normal(0, 0.3)
else:
temp = 0.2 + (year - 1980) * 0.025 + np.random.normal(0, 0.3)
temps.append(temp)
df = pd.DataFrame({'Year': years, 'Temperature': temps})
return df
def perform_polynomial_regression(df, degree):
"""多項式回帰分析"""
X = df['Year'].values.reshape(-1, 1)
y = df['Temperature'].values
# 多項式特徴量を作成
poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(X)
# 線形回帰モデルを適用
model = LinearRegression()
model.fit(X_poly, y)
# 予測値を計算
y_pred = model.predict(X_poly)
# 決定係数(R²)を計算
r2_score = model.score(X_poly, y)
return y_pred, r2_score
def perform_spline_regression(df, smoothing=0.1):
"""スプライン回帰(平滑化スプライン)"""
X = df['Year'].values
y = df['Temperature'].values
# データ数に応じて平滑化パラメータを調整
s_value = len(X) * smoothing
# UnivariateSplineを使用
spline = UnivariateSpline(X, y, s=s_value)
y_pred = spline(X)
# R²を計算
ss_res = np.sum((y - y_pred) ** 2)
ss_tot = np.sum((y - np.mean(y)) ** 2)
r2_score = 1 - (ss_res / ss_tot)
return y_pred, r2_score
def perform_loess_smoothing(df, frac=0.2):
"""LOESSによる局所加重回帰"""
from scipy.signal import savgol_filter
y = df['Temperature'].values
# Savitzky-Golayフィルタを使用(LOESSの代替)
window_length = max(5, int(len(y) * frac))
if window_length % 2 == 0:
window_length += 1
y_smooth = savgol_filter(y, window_length, 3)
# R²を計算
ss_res = np.sum((y - y_smooth) ** 2)
ss_tot = np.sum((y - np.mean(y)) ** 2)
r2_score = 1 - (ss_res / ss_tot)
return y_smooth, r2_score
def analyze_regression_accuracy(df, predictions_dict):
"""各回帰手法の精度を詳細に分析"""
y_true = df['Temperature'].values
print("\n" + "="*70)
print("回帰分析精度の詳細評価")
print("="*70)
results = []
for method_name, y_pred in predictions_dict.items():
# 各種評価指標を計算
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
# R²スコア
ss_res = np.sum((y_true - y_pred) ** 2)
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
r2 = 1 - (ss_res / ss_tot)
# 調整済みR²(自由度調整)
n = len(y_true)
p = 2 if '多項式' in method_name else 1 # 簡易的な自由度
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
# 最大誤差
max_error = np.max(np.abs(y_true - y_pred))
# 残差の標準偏差
residual_std = np.std(y_true - y_pred)
results.append({
'method': method_name,
'R²': r2,
'Adj_R²': adj_r2,
'RMSE': rmse,
'MAE': mae,
'Max_Error': max_error,
'Residual_Std': residual_std
})
print(f"\n【{method_name}】")
print(f" 決定係数 (R²): {r2:.6f}")
print(f" 調整済みR²: {adj_r2:.6f}")
print(f" 平均二乗誤差 (MSE): {mse:.6f}")
print(f" 平均平方根誤差 (RMSE): {rmse:.6f}°C")
print(f" 平均絶対誤差 (MAE): {mae:.6f}°C")
print(f" 最大誤差: {max_error:.6f}°C")
print(f" 残差標準偏差: {residual_std:.6f}°C")
# 総合評価
print("\n" + "="*70)
print("総合評価")
print("="*70)
results_df = pd.DataFrame(results)
# 各指標で最良の手法を特定
best_r2 = results_df.loc[results_df['R²'].idxmax(), 'method']
best_rmse = results_df.loc[results_df['RMSE'].idxmin(), 'method']
best_mae = results_df.loc[results_df['MAE'].idxmin(), 'method']
print(f" 最高R²スコア: {best_r2}")
print(f" 最小RMSE: {best_rmse}")
print(f" 最小MAE: {best_mae}")
# 過学習の可能性を評価
print("\n【過学習リスク評価】")
for _, row in results_df.iterrows():
r2_adj_diff = row['R²'] - row['Adj_R²']
if r2_adj_diff > 0.02:
print(f" {row['method']}: 過学習の可能性あり (R²とAdj_R²の差: {r2_adj_diff:.4f})")
else:
print(f" {row['method']}: 良好 (R²とAdj_R²の差: {r2_adj_diff:.4f})")
return results_df
def predict_future_temperature(df, years_ahead=100):
"""各回帰手法で将来の気温を予測"""
current_year = df['Year'].max()
future_years = np.arange(current_year + 1, current_year + years_ahead + 1)
predictions = {}
# 2次多項式での予測
X_train = df['Year'].values.reshape(-1, 1)
y_train = df['Temperature'].values
poly_2 = PolynomialFeatures(degree=2)
X_poly_2 = poly_2.fit_transform(X_train)
model_2 = LinearRegression()
model_2.fit(X_poly_2, y_train)
X_future_2 = poly_2.transform(future_years.reshape(-1, 1))
predictions['2次多項式'] = model_2.predict(X_future_2)
# 3次多項式での予測
poly_3 = PolynomialFeatures(degree=3)
X_poly_3 = poly_3.fit_transform(X_train)
model_3 = LinearRegression()
model_3.fit(X_poly_3, y_train)
X_future_3 = poly_3.transform(future_years.reshape(-1, 1))
predictions['3次多項式'] = model_3.predict(X_future_3)
# 4次多項式での予測
poly_4 = PolynomialFeatures(degree=4)
X_poly_4 = poly_4.fit_transform(X_train)
model_4 = LinearRegression()
model_4.fit(X_poly_4, y_train)
X_future_4 = poly_4.transform(future_years.reshape(-1, 1))
predictions['4次多項式'] = model_4.predict(X_future_4)
# スプライン予測(外挿)
X_all = np.concatenate([df['Year'].values, future_years])
spline = UnivariateSpline(df['Year'].values, df['Temperature'].values, s=len(df) * 0.15)
predictions['スプライン平滑化'] = spline(future_years)
# 線形トレンドでの予測(LOESSの代替として)
from scipy.stats import linregress
slope, intercept, _, _, _ = linregress(df['Year'].values[-50:], df['Temperature'].values[-50:])
predictions['線形トレンド(直近50年)'] = slope * future_years + intercept
return future_years, predictions
def plot_temperature_data(df):
"""気温偏差データをグラフ化(複数の回帰手法を比較 + 100年予測)"""
# 日本語フォントの設定
plt.rcParams['font.sans-serif'] = ['Yu Gothic', 'MS Gothic', 'Meiryo']
plt.rcParams['axes.unicode_minus'] = False
# 各種回帰分析を実行
y_pred_2, r2_2 = perform_polynomial_regression(df, degree=2)
y_pred_3, r2_3 = perform_polynomial_regression(df, degree=3)
y_pred_4, r2_4 = perform_polynomial_regression(df, degree=4)
y_spline, r2_spline = perform_spline_regression(df, smoothing=0.15)
y_loess, r2_loess = perform_loess_smoothing(df, frac=0.15)
# 予測結果を辞書にまとめる
predictions = {
'2次多項式': y_pred_2,
'3次多項式': y_pred_3,
'4次多項式': y_pred_4,
'スプライン平滑化': y_spline,
'LOESS平滑化': y_loess
}
# 精度分析を実行
accuracy_results = analyze_regression_accuracy(df, predictions)
# 将来予測を実行
future_years, future_predictions = predict_future_temperature(df, years_ahead=100)
# 予測結果を表示
print("\n" + "="*70)
print("100年後(2124年頃)の気温偏差予測")
print("="*70)
for method, pred_values in future_predictions.items():
print(f"{method:20s}: {pred_values[-1]:+.2f}°C")
print("="*70)
# 3つの図を作成
# 図1: 全ての回帰曲線を1つのグラフに(予測含む)
fig1 = plt.figure(figsize=(16, 8))
# 実測データ
plt.scatter(df['Year'], df['Temperature'], s=15, color='#8a7ca8',
label='実測データ', alpha=0.5, zorder=5)
# 現在までの回帰曲線
plt.plot(df['Year'], y_pred_2, linewidth=1.5, color='#b2947f',
label=f'2次多項式 (R²={r2_2:.4f})', linestyle='--', alpha=0.7)
plt.plot(df['Year'], y_pred_3, linewidth=1.5, color='#7f9c7f',
label=f'3次多項式 (R²={r2_3:.4f})', linestyle='--', alpha=0.7)
plt.plot(df['Year'], y_pred_4, linewidth=1.5, color='#6b7a8f',
label=f'4次多項式 (R²={r2_4:.4f})', linestyle=':', alpha=0.7)
plt.plot(df['Year'], y_spline, linewidth=2, color='#d1a38f',
label=f'スプライン平滑化 (R²={r2_spline:.4f})', alpha=0.9)
# 将来予測(実線で表示)
plt.plot(future_years, future_predictions['2次多項式'],
linewidth=1.5, color='#b2947f', linestyle='-', alpha=0.6,
label='2次多項式予測')
plt.plot(future_years, future_predictions['3次多項式'],
linewidth=1.5, color='#7f9c7f', linestyle='-', alpha=0.6,
label='3次多項式予測')
plt.plot(future_years, future_predictions['4次多項式'],
linewidth=1.5, color='#6b7a8f', linestyle='-', alpha=0.6,
label='4次多項式予測')
plt.plot(future_years, future_predictions['スプライン平滑化'],
linewidth=2, color='#d1a38f', linestyle='-', alpha=0.6,
label='スプライン予測')
plt.plot(future_years, future_predictions['線形トレンド(直近50年)'],
linewidth=1.5, color='#e74c3c', linestyle='-', alpha=0.8,
label='線形トレンド予測')
# 現在と予測の境界線
current_year = df['Year'].max()
plt.axvline(x=current_year, color='red', linestyle='--', linewidth=1.5,
alpha=0.7, label=f'現在({current_year}年)')
plt.title('日本の年平均気温偏差の変化と100年予測', fontsize=14, pad=15)
plt.xlabel('年', fontsize=10)
plt.ylabel('気温偏差 (°C)', fontsize=10)
plt.axhline(y=0, color='gray', linestyle='-', linewidth=0.8)
plt.grid(True, alpha=0.3, linestyle='--')
plt.legend(loc='upper left', fontsize=7, framealpha=0.9, ncol=2)
plt.gca().set_facecolor('#f5f5f5')
plt.tight_layout()
# 図2: 各回帰手法を個別のサブプロットで表示(予測含む)
fig2, axes = plt.subplots(3, 2, figsize=(18, 14))
fig2.suptitle('各回帰分析の詳細結果と100年予測(気温偏差)', fontsize=16, fontweight='bold', y=0.995)
regression_data = [
('2次多項式回帰', y_pred_2, r2_2, '#b2947f', future_predictions['2次多項式']),
('3次多項式回帰', y_pred_3, r2_3, '#7f9c7f', future_predictions['3次多項式']),
('4次多項式回帰', y_pred_4, r2_4, '#6b7a8f', future_predictions['4次多項式']),
('スプライン平滑化', y_spline, r2_spline, '#d1a38f', future_predictions['スプライン平滑化']),
('線形トレンド', y_loess, r2_loess, '#e74c3c', future_predictions['線形トレンド(直近50年)']),
]
for idx, (name, y_pred, r2, color, y_future) in enumerate(regression_data):
row = idx // 2
col = idx % 2
ax = axes[row, col]
# 実測データ
ax.scatter(df['Year'], df['Temperature'], s=12, color='#8a7ca8',
label='実測データ', alpha=0.4, zorder=5)
# 現在までの回帰曲線
ax.plot(df['Year'], y_pred, linewidth=2, color=color,
label=f'{name}\nR²={r2:.4f}', alpha=0.9)
# 将来予測(実線で表示)
ax.plot(future_years, y_future, linewidth=2, color=color,
linestyle='-', alpha=0.6, label=f'予測({future_years[-1]:.0f}年まで)')
# 現在の境界線
ax.axvline(x=current_year, color='red', linestyle='--',
linewidth=1, alpha=0.5)
# 予測終了時の値を表示
ax.text(future_years[-1], y_future[-1],
f'{y_future[-1]:+.1f}°C',
fontsize=7, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.3', facecolor=color, alpha=0.3))
ax.set_title(f'{name} - 2124年予測: {y_future[-1]:+.2f}°C',
fontsize=10, fontweight='bold', pad=8)
ax.set_xlabel('年', fontsize=8)
ax.set_ylabel('気温偏差 (°C)', fontsize=8)
ax.tick_params(labelsize=7)
ax.axhline(y=0, color='gray', linestyle='-', linewidth=0.8)
ax.grid(True, alpha=0.3, linestyle='--')
ax.legend(loc='upper left', fontsize=7)
ax.set_facecolor('#f5f5f5')
# 最後のサブプロット(6番目)に予測比較グラフ
ax_last = axes[2, 1]
methods = [name for name, _, _, _, _ in regression_data]
pred_2124 = [y_future[-1] for _, _, _, _, y_future in regression_data]
colors_bar = [color for _, _, _, color, _ in regression_data]
bars = ax_last.barh(methods, pred_2124, color=colors_bar, alpha=0.7)
ax_last.set_xlabel('予測気温偏差 (°C)', fontsize=8)
ax_last.set_title('2124年の気温偏差予測比較', fontsize=10, fontweight='bold', pad=8)
ax_last.tick_params(labelsize=7)
ax_last.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
ax_last.grid(True, alpha=0.3, axis='x')
ax_last.set_facecolor('#f5f5f5')
# 棒グラフに値を表示
for i, (bar, pred) in enumerate(zip(bars, pred_2124)):
x_pos = pred + (0.1 if pred > 0 else -0.1)
ha = 'left' if pred > 0 else 'right'
ax_last.text(x_pos, i, f'{pred:+.2f}°C',
va='center', ha=ha, fontsize=7, fontweight='bold')
plt.tight_layout()
# 図3: 予測の不確実性を示すグラフ
fig3 = plt.figure(figsize=(16, 8))
plt.scatter(df['Year'], df['Temperature'], s=15, color='#8a7ca8',
label='実測データ', alpha=0.5, zorder=5)
# 全ての予測を半透明で表示
all_predictions = []
for method, pred_values in future_predictions.items():
plt.plot(future_years, pred_values, linewidth=1.5, alpha=0.4)
all_predictions.append(pred_values)
# 予測の平均と範囲を計算
all_predictions = np.array(all_predictions)
pred_mean = np.mean(all_predictions, axis=0)
pred_std = np.std(all_predictions, axis=0)
pred_min = np.min(all_predictions, axis=0)
pred_max = np.max(all_predictions, axis=0)
# 平均予測を太線で表示
plt.plot(future_years, pred_mean, linewidth=2.5, color='#e74c3c',
label=f'予測平均 (2124年: {pred_mean[-1]:+.2f}°C)', zorder=10)
# 不確実性の範囲を塗りつぶし
plt.fill_between(future_years, pred_min, pred_max,
color='#e74c3c', alpha=0.2, label='予測範囲')
plt.axvline(x=current_year, color='red', linestyle='--', linewidth=1.5,
alpha=0.7, label=f'現在({current_year}年)')
plt.title('気温偏差予測の不確実性(全モデルの比較)', fontsize=14, pad=15)
plt.xlabel('年', fontsize=10)
plt.ylabel('気温偏差 (°C)', fontsize=10)
plt.axhline(y=0, color='gray', linestyle='-', linewidth=0.8)
plt.grid(True, alpha=0.3, linestyle='--')
plt.legend(loc='upper left', fontsize=8, framealpha=0.9)
plt.gca().set_facecolor('#f5f5f5')
# 予測範囲の情報を追加
plt.text(0.02, 0.98,
f'2124年予測範囲:\n最小: {pred_min[-1]:+.2f}°C\n最大: {pred_max[-1]:+.2f}°C\n平均: {pred_mean[-1]:+.2f}°C\n標準偏差: {pred_std[-1]:.2f}°C',
transform=plt.gca().transAxes, fontsize=8,
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
plt.tight_layout()
plt.show()
def main():
print("日本の気温偏差データをダウンロード中...")
df = download_japan_temperature_data()
if df is not None and not df.empty:
print(f"{len(df)}件のデータを取得しました")
print(f"期間: {df['Year'].min()}年 - {df['Year'].max()}年")
print(f"最新の気温偏差: {df['Temperature'].iloc[-1]:.2f}°C")
print(f"平均偏差: {df['Temperature'].mean():.2f}°C")
plot_temperature_data(df)
else:
print("データの取得に失敗しました")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment