Skip to content

Instantly share code, notes, and snippets.

@ValentinMouret
Last active July 6, 2022 06:39
Show Gist options
  • Select an option

  • Save ValentinMouret/dab8d854c93dd180042346b115479a14 to your computer and use it in GitHub Desktop.

Select an option

Save ValentinMouret/dab8d854c93dd180042346b115479a14 to your computer and use it in GitHub Desktop.
-- By beginning a transaction, you ensure
-- that you won't break anything.
-- You can always rollback.
begin;
create temp table batch_user (
 -- Enforcing a primary key slows down the processing
 -- but it helps you making sure you don't have duplicates
 -- in your batch.
 id text primary key,
 -- By storing the full JSON, we can easily process
 -- the Firestore document.
 document jsonb not null
);
-- This will read a CSV on your local filesystem
-- and insert its records in the table you just created.
copy batch_user (id, document)
from '/Users/valentinmouret/Desktop/users.csv'
with (format 'csv',
 header,
 delimiter ',');
-- Optional
-- This creates a savepoint.
-- Whatever happens after (unless you commit)
-- you will be able to rollback to this savepoint using
-- `rollback to temporary_table_created`.
-- This can be very useful when experimenting,
-- looking around to make sure things are the way they should.
savepoint batch_inserted;
-- E.g., get a glimpse of the emails.
select document->>'email'
from batch_user
limit 10;
-- Here is the actual batch insert.
-- Since its a SQL query, you can leverage the full
-- power of SQL to prepare your insert. `group by`,
-- `join`, function calls…
insert into user
  (id, email)
select batch_user.id,
  -- Access the `email` attribute of our JSON.
  batch_user.document->>'email' as email
  from batch_user
 -- Since there might be data in the table already,
 -- you might want to leave out items that were migrated.
  -- This can be done with a left join.
  left join user
  using (id)
 where user.id is null
  -- We have a `not null` constraint in our table,
  -- so we need to filter out null values.
  and batch_user.document->>'email' is not null;
savepoint done;
-- Look around some more.
select count(*)
 from user;
select id,
email
 from user
 limit 10;
-- Once everything looks good.
commit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment