Created
February 11, 2026 10:36
-
-
Save pgtwitter/edc23cfcf5ddc40ddd55fbefe1fe1ab8 to your computer and use it in GitHub Desktop.
複数のCSVファイルを「キー(1列目+2列目)」を基準に重複を除去しながら結合(マージ) するBashスクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| OUTPUT="$1" | |
| shift | |
| if [ $# -eq 0 ] || [ "${OUTPUT}" == "-h" ] || [ "${OUTPUT}" == "--help" ]; then | |
| echo "ファイルが指定されていません" >&2 | |
| echo "使い方: ./merge.sh 出力.csv file1.csv file2.csv ..." >&2 | |
| echo "または ./merge.sh 出力.csv *.csv" >&2 | |
| exit 1 | |
| fi | |
| # ファイル名で降順ソート | |
| # (ファイル名が日付順などでソートされる前提) | |
| files=($(printf '%s\n' "$@" | sort -r)) | |
| echo "処理順(降順):" | |
| printf '\t%s\n' "${files[@]}" | |
| # 一時ファイル | |
| SEEN_KEYS=$(mktemp) | |
| : > "$SEEN_KEYS" | |
| trap 'rm -f "${SEEN_KEYS}"' EXIT # 終了時に必ず削除 | |
| # 最初のファイルをそのまま出力 + キーを記録 | |
| first=${files[0]} | |
| head -n1 "$first" > "$OUTPUT" # ヘッダー | |
| tail -n+2 "$first" >> "$OUTPUT" # データ | |
| awk -F, -v OFS=, ' | |
| { | |
| key = $1 FS $2 | |
| if (!(key in seen)) { | |
| seen[key] = 1 | |
| print key > "'"$SEEN_KEYS"'" | |
| } | |
| } | |
| ' "$first" | |
| # 2番目以降のファイル | |
| for f in "${files[@]:1}"; do | |
| echo "処理中: $f" | |
| awk -F, -v OFS=, ' | |
| NR==FNR { | |
| seen[$0] = 1 | |
| next | |
| } | |
| FNR==1 { next } # ヘッダーはスキップ | |
| { | |
| key = $1 FS $2 | |
| if (!(key in seen)) { | |
| seen[key] = 1 | |
| print key >> "'"$SEEN_KEYS"'" | |
| } | |
| } | |
| ' "$SEEN_KEYS" "$f" >> "$OUTPUT" | |
| done | |
| echo "出力完了: $OUTPUT" | |
| wc -l "$OUTPUT" | |
Author
pgtwitter
commented
Feb 11, 2026
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment