peterc · February 8, 2026 23:43
diff --git a/bookmarks.rb b/bookmarks.rb
 # Fetch X bookmarks and output them as CSV.
 #
 # On first run, opens a browser for OAuth 2.0 authorization and stores
 # tokens in tokens-USERNAME.json. Subsequent runs reuse (and auto-refresh)
 # those tokens, so no further login is needed. Multiple users are supported
 # via separate token files.
 #
 # Usage:
 #   ruby bookmarks.rb [--username USER] [--limit N] [--per-call N]
 #
 #   --username X username to load/store tokens for. Without this flag,
 #              the first available tokens-*.json is used.
 #   --limit    Total bookmarks to retrieve (default: 20)
 #   --per-call Max results per API call, useful for testing (default: 50)
 #
 # Environment variables (via .env):
 #   X_CLIENT_SECRET_ID  - OAuth 2.0 client ID
 #   X_CLIENT_SECRET     - OAuth 2.0 client secret
 #
 # Gems:
 #   gem install x twitter_oauth2 dotenv
 #
 # Note: It outputs CSV to stdout, and progress messages to stderr.
 # Also note: X API barfs doing batches of 100, so this does it in batches of 50.

 require "dotenv/load"
 require "json"
 require "csv"
 require "optparse"
 require "x"
 require "twitter_oauth2"

 TOKENS_DIR = __dir__

 options = { limit: 20, per_call: 50, username: nil }
 OptionParser.new do |opts|
  opts.on("--limit N", Integer, "Total bookmarks to retrieve (default: 20)") { |n| options[:limit] = n }
  opts.on("--per-call N", Integer, "Max results per API call (default: 50)") { |n| options[:per_call] = n }
  opts.on("--username USER", "X username to load tokens for") { |u| options[:username] = u }
 end.parse!

 if options[:per_call] > 50
  $stderr.puts "Warning: --per-call capped to 50 (higher values cause incomplete results)"
  options[:per_call] = 50
 end

 client_id = ENV["X_CLIENT_SECRET_ID"]
 client_secret = ENV["X_CLIENT_SECRET"]
 redirect_uri = "http://127.0.0.1:8765/"

 def tokens_file_for(username)
  File.join(TOKENS_DIR, "tokens-#{username}.json")
 end

 def save_tokens(file, access_token:, refresh_token:, expires_at:)
  File.write(file, JSON.pretty_generate({
    access_token: access_token,
    refresh_token: refresh_token,
    expires_at: expires_at.is_a?(Time) ? expires_at.to_i : expires_at
  }))
 end

 def load_tokens(file)
  JSON.parse(File.read(file))
 end

 # Find tokens file: by --username flag, or fall back to first tokens-*.json found
 if options[:username]
  tokens_file = tokens_file_for(options[:username])
  tokens_file = nil unless File.exist?(tokens_file)
 else
  tokens_file = Dir.glob(File.join(TOKENS_DIR, "tokens-*.json")).first
 end

 if tokens_file
  tokens = load_tokens(tokens_file)
 else
  # First run — full OAuth 2.0 flow via twitter_oauth2 gem
  oauth = TwitterOAuth2::Client.new(
    identifier: client_id,
    secret: client_secret,
    redirect_uri: redirect_uri
  )

  authorization_url = oauth.authorization_uri(
    scope: [:'users.read', :'tweet.read', :'bookmark.read', :'offline.access']
  )
  code_verifier = oauth.code_verifier

  puts "Visit this URL to authorize (opening in browser):"
  puts authorization_url
  `open "#{authorization_url}"`

  print "Paste the full redirect URL after authorizing: "
  STDOUT.flush

  full_url = gets.chomp
  code = full_url.split("code=").last

  oauth.authorization_code = code
  token_response = oauth.access_token!(code_verifier)

  # Get username to name the tokens file
  tmp_client = X::Client.new(
    client_id: client_id,
    client_secret: client_secret,
    access_token: token_response.access_token,
    refresh_token: token_response.refresh_token
  )
  me = tmp_client.get("users/me")
  username = me["data"]["username"]

  tokens_file = tokens_file_for(username)
  save_tokens(tokens_file,
    access_token: token_response.access_token,
    refresh_token: token_response.refresh_token,
    expires_at: Time.now.to_i + token_response.expires_in
  )
  tokens = load_tokens(tokens_file)
 end

 # Build the x-ruby client
 x = X::Client.new(
  client_id: client_id,
  client_secret: client_secret,
  access_token: tokens["access_token"],
  refresh_token: tokens["refresh_token"]
 )

 # Set expires_at on the authenticator directly
 x.authenticator.expires_at = Time.at(tokens["expires_at"])

 # Auto-refresh if expired
 if x.authenticator.token_expired?
  x.authenticator.refresh_token!
  save_tokens(tokens_file,
    access_token: x.authenticator.access_token,
    refresh_token: x.authenticator.refresh_token,
    expires_at: x.authenticator.expires_at
  )
 end

 # Get user ID
 me = x.get("users/me")
 user_id = me["data"]["id"]

 # Fetch bookmarks with pagination
 all_tweets = []
 users = {}
 pagination_token = nil

 loop do
  remaining = options[:limit] - all_tweets.size
  per_call = [options[:per_call], remaining].min
  break if per_call <= 0

  query = "users/#{user_id}/bookmarks?max_results=#{per_call}&tweet.fields=created_at,author_id,text,entities,public_metrics,note_tweet&expansions=author_id&user.fields=username,verified"
  query += "&pagination_token=#{pagination_token}" if pagination_token

  bookmarks = x.get(query)

  (bookmarks.dig("includes", "users") || []).each do |u|
    users[u["id"]] = u
  end

  tweets = bookmarks["data"] || []
  break if tweets.empty?

  all_tweets.concat(tweets)
  pagination_token = bookmarks.dig("meta", "next_token")
  $stderr.puts "Fetched #{all_tweets.size} so far | next_token: #{pagination_token || 'none'}"
  break unless pagination_token
 end

 # Output CSV
 csv = CSV.generate do |out|
  out << ["tweet_id", "text", "date", "username", "name", "verified", "url", "likes", "retweets", "replies", "views", "bookmarks"]
  all_tweets.each do |tweet|
    user = users[tweet["author_id"]] || {}
    # Prefer note_tweet text (full text for long posts) over the truncated default
    text = tweet.dig("note_tweet", "text") || tweet["text"]
    (tweet.dig("entities", "urls") || []).each do |entity|
      text = text.gsub(entity["url"], entity["expanded_url"]) if entity["expanded_url"]
    end
    metrics = tweet["public_metrics"] || {}
    out << [
      tweet["id"],
      text,
      tweet["created_at"],
      user["username"],
      user["name"],
      user["verified"],
      "https://x.com/#{user["username"]}/status/#{tweet["id"]}",
      metrics["like_count"],
      metrics["retweet_count"],
      metrics["reply_count"],
      metrics["impression_count"],
      metrics["bookmark_count"]
    ]
  end
 end

 puts csv
	# Fetch X bookmarks and output them as CSV.
	#
	# On first run, opens a browser for OAuth 2.0 authorization and stores
	# tokens in tokens-USERNAME.json. Subsequent runs reuse (and auto-refresh)
	# those tokens, so no further login is needed. Multiple users are supported
	# via separate token files.
	#
	# Usage:
	# ruby bookmarks.rb [--username USER] [--limit N] [--per-call N]
	#
	# --username X username to load/store tokens for. Without this flag,
	# the first available tokens-*.json is used.
	# --limit Total bookmarks to retrieve (default: 20)
	# --per-call Max results per API call, useful for testing (default: 50)
	#
	# Environment variables (via .env):
	# X_CLIENT_SECRET_ID - OAuth 2.0 client ID
	# X_CLIENT_SECRET - OAuth 2.0 client secret
	#
	# Gems:
	# gem install x twitter_oauth2 dotenv
	#
	# Note: It outputs CSV to stdout, and progress messages to stderr.
	# Also note: X API barfs doing batches of 100, so this does it in batches of 50.

	require "dotenv/load"
	require "json"
	require "csv"
	require "optparse"
	require "x"
	require "twitter_oauth2"

	TOKENS_DIR = __dir__

	options = { limit: 20, per_call: 50, username: nil }
	OptionParser.new do \|opts\|
	opts.on("--limit N", Integer, "Total bookmarks to retrieve (default: 20)") { \|n\| options[:limit] = n }
	opts.on("--per-call N", Integer, "Max results per API call (default: 50)") { \|n\| options[:per_call] = n }
	opts.on("--username USER", "X username to load tokens for") { \|u\| options[:username] = u }
	end.parse!

	if options[:per_call] > 50
	$stderr.puts "Warning: --per-call capped to 50 (higher values cause incomplete results)"
	options[:per_call] = 50
	end

	client_id = ENV["X_CLIENT_SECRET_ID"]
	client_secret = ENV["X_CLIENT_SECRET"]
	redirect_uri = "http://127.0.0.1:8765/"

	def tokens_file_for(username)
	File.join(TOKENS_DIR, "tokens-#{username}.json")
	end

	def save_tokens(file, access_token:, refresh_token:, expires_at:)
	File.write(file, JSON.pretty_generate({
	access_token: access_token,
	refresh_token: refresh_token,
	expires_at: expires_at.is_a?(Time) ? expires_at.to_i : expires_at
	}))
	end

	def load_tokens(file)
	JSON.parse(File.read(file))
	end

	# Find tokens file: by --username flag, or fall back to first tokens-*.json found
	if options[:username]
	tokens_file = tokens_file_for(options[:username])
	tokens_file = nil unless File.exist?(tokens_file)
	else
	tokens_file = Dir.glob(File.join(TOKENS_DIR, "tokens-*.json")).first
	end

	if tokens_file
	tokens = load_tokens(tokens_file)
	else
	# First run — full OAuth 2.0 flow via twitter_oauth2 gem
	oauth = TwitterOAuth2::Client.new(
	identifier: client_id,
	secret: client_secret,
	redirect_uri: redirect_uri
	)

	authorization_url = oauth.authorization_uri(
	scope: [:'users.read', :'tweet.read', :'bookmark.read', :'offline.access']
	)
	code_verifier = oauth.code_verifier

	puts "Visit this URL to authorize (opening in browser):"
	puts authorization_url
	`open "#{authorization_url}"`

	print "Paste the full redirect URL after authorizing: "
	STDOUT.flush

	full_url = gets.chomp
	code = full_url.split("code=").last

	oauth.authorization_code = code
	token_response = oauth.access_token!(code_verifier)

	# Get username to name the tokens file
	tmp_client = X::Client.new(
	client_id: client_id,
	client_secret: client_secret,
	access_token: token_response.access_token,
	refresh_token: token_response.refresh_token
	)
	me = tmp_client.get("users/me")
	username = me["data"]["username"]

	tokens_file = tokens_file_for(username)
	save_tokens(tokens_file,
	access_token: token_response.access_token,
	refresh_token: token_response.refresh_token,
	expires_at: Time.now.to_i + token_response.expires_in
	)
	tokens = load_tokens(tokens_file)
	end

	# Build the x-ruby client
	x = X::Client.new(
	client_id: client_id,
	client_secret: client_secret,
	access_token: tokens["access_token"],
	refresh_token: tokens["refresh_token"]
	)

	# Set expires_at on the authenticator directly
	x.authenticator.expires_at = Time.at(tokens["expires_at"])

	# Auto-refresh if expired
	if x.authenticator.token_expired?
	x.authenticator.refresh_token!
	save_tokens(tokens_file,
	access_token: x.authenticator.access_token,
	refresh_token: x.authenticator.refresh_token,
	expires_at: x.authenticator.expires_at
	)
	end

	# Get user ID
	me = x.get("users/me")
	user_id = me["data"]["id"]

	# Fetch bookmarks with pagination
	all_tweets = []
	users = {}
	pagination_token = nil

	loop do
	remaining = options[:limit] - all_tweets.size
	per_call = [options[:per_call], remaining].min
	break if per_call <= 0

	query = "users/#{user_id}/bookmarks?max_results=#{per_call}&tweet.fields=created_at,author_id,text,entities,public_metrics,note_tweet&expansions=author_id&user.fields=username,verified"
	query += "&pagination_token=#{pagination_token}" if pagination_token

	bookmarks = x.get(query)

	(bookmarks.dig("includes", "users") \|\| []).each do \|u\|
	users[u["id"]] = u
	end

	tweets = bookmarks["data"] \|\| []
	break if tweets.empty?

	all_tweets.concat(tweets)
	pagination_token = bookmarks.dig("meta", "next_token")
	$stderr.puts "Fetched #{all_tweets.size} so far \| next_token: #{pagination_token \|\| 'none'}"
	break unless pagination_token
	end

	# Output CSV
	csv = CSV.generate do \|out\|
	out << ["tweet_id", "text", "date", "username", "name", "verified", "url", "likes", "retweets", "replies", "views", "bookmarks"]
	all_tweets.each do \|tweet\|
	user = users[tweet["author_id"]] \|\| {}
	# Prefer note_tweet text (full text for long posts) over the truncated default
	text = tweet.dig("note_tweet", "text") \|\| tweet["text"]
	(tweet.dig("entities", "urls") \|\| []).each do \|entity\|
	text = text.gsub(entity["url"], entity["expanded_url"]) if entity["expanded_url"]
	end
	metrics = tweet["public_metrics"] \|\| {}
	out << [
	tweet["id"],
	text,
	tweet["created_at"],
	user["username"],
	user["name"],
	user["verified"],
	"https://x.com/#{user["username"]}/status/#{tweet["id"]}",
	metrics["like_count"],
	metrics["retweet_count"],
	metrics["reply_count"],
	metrics["impression_count"],
	metrics["bookmark_count"]
	]
	end
	end

	puts csv
No results found