Skip to content

Instantly share code, notes, and snippets.

@norberttech
Created January 27, 2025 05:08
Show Gist options
  • Select an option

  • Save norberttech/639576560ef50337a7fe8c21891b3814 to your computer and use it in GitHub Desktop.

Select an option

Save norberttech/639576560ef50337a7fe8c21891b3814 to your computer and use it in GitHub Desktop.
Data Generator for flow-php.com blog post - Processing Data in PHP
<?php
declare(strict_types=1);
use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Schema\Column;
use Doctrine\DBAL\Schema\ForeignKeyConstraint;
use Doctrine\DBAL\Schema\Index;
use Doctrine\DBAL\Schema\Table;
use Doctrine\DBAL\Schema\UniqueConstraint;
use Doctrine\DBAL\Types\Type;
use Doctrine\DBAL\Types\Types;
use Flow\ETL\Adapter\Doctrine\Order;
use Flow\ETL\Adapter\Doctrine\OrderBy;
use Flow\ETL\Function\ScalarFunction;
use Flow\ETL\Row;
use function Flow\ETL\Adapter\CSV\to_csv;
use function Flow\ETL\Adapter\Doctrine\from_dbal_limit_offset;
use function Flow\ETL\Adapter\Doctrine\to_dbal_table_insert;
use function Flow\ETL\DSL\df;
use function Flow\ETL\DSL\from_array;
use function Flow\ETL\DSL\overwrite;
use function Flow\ETL\DSL\ref;
require_once __DIR__ . '/../../../vendor/autoload.php';
$connection = DriverManager::getConnection([
'path' => __DIR__ . '/database.db',
'driver' => 'pdo_sqlite',
]);
$schemaManager = $connection->createSchemaManager();
if ($schemaManager->tablesExist(['users'])) {
$schemaManager->dropTable('users');
}
$schemaManager->createTable(new Table(
$table = 'users',
[
new Column('id', Type::getType(Types::GUID), ['notnull' => true]),
new Column('first_name', Type::getType(Types::STRING), ['notnull' => true, 'length' => 256]),
new Column('last_name', Type::getType(Types::STRING), ['notnull' => true, 'length' => 256]),
new Column('email', Type::getType(Types::STRING), ['notnull' => true, 'length' => 256]),
],
[
new Index('users_email_idx', ['email']),
],
[
new UniqueConstraint('users_id', ['id']),
new UniqueConstraint('users_email', ['email']),
]
));
if ($schemaManager->tablesExist(['user_addresses'])) {
$schemaManager->dropTable('user_addresses');
}
$schemaManager->createTable(new Table(
$table = 'user_addresses',
[
new Column('id', Type::getType(Types::GUID), ['notnull' => true]),
new Column('user_id', Type::getType(Types::GUID), ['notnull' => true]),
new Column('country', Type::getType(Types::STRING), ['notnull' => true, 'length' => 2]),
new Column('state', Type::getType(Types::STRING), ['notnull' => true, 'length' => 2]),
new Column('zip', Type::getType(Types::STRING), ['notnull' => true, 'length' => 12]),
new Column('city', Type::getType(Types::STRING), ['notnull' => true, 'length' => 256]),
new Column('address_1', Type::getType(Types::STRING), ['notnull' => true, 'length' => 256]),
new Column('address_2', Type::getType(Types::STRING), ['notnull' => false, 'length' => 256]),
new Column('address_3', Type::getType(Types::STRING), ['notnull' => false, 'length' => 256]),
],
[
],
[
new UniqueConstraint('user_addresses_id', ['id']),
],
[
new ForeignKeyConstraint(['user_id'], 'users', ['id'], 'user_addresses_users_id_fk'),
]
));
$faker = Faker\Factory::create();
$generateUsers = function (\Faker\Generator $faker) : \Generator {
for ($i = 0; $i < 10_000; $i++) {
yield [
'id' => $faker->uuid,
'first_name' => $faker->firstName,
'last_name' => $faker->lastName,
'email' => \random_int(0, 10) === 1 ? '' : $faker->email,
];
}
};
df()
->read(from_array($generateUsers($faker)))
->write(
to_dbal_table_insert(
$connection,
'users',
[
'conflict_columns' => ['email']
]
)
)
->run();
df()
->read(from_dbal_limit_offset(
$connection,
'users',
new OrderBy('id', Order::ASC)
))
->select('email')
->withEntry('addresses', new class ($faker) implements ScalarFunction {
public function __construct(private \Faker\Generator $faker)
{
}
public function eval(Row $row): mixed
{
$addresses = [];
for ($i = 0; $i < \random_int(1, 5); $i++) {
$addresses[] = [
'id' => random_int(0, 10) === 1 ? null : $this->faker->uuid,
'user_email' => $row->valueOf('email'),
'country' => 'US',
'state' => $this->faker->stateAbbr,
'zip' => $this->faker->postcode,
'city' => $this->faker->city,
'address_1' => $this->faker->streetName,
'address_2' => $this->faker->buildingNumber,
'address_3' => \random_int(0, 5) === 1 ? $this->faker->streetSuffix : null,
];
}
return $addresses;
}
})
->drop('email')
->withEntry('address', ref('addresses')->expand())
->drop('addresses')
->withEntry('address', ref('address')->unpack())
->drop('address')
->renameAll('address.', '')
->saveMode(overwrite())
->write(to_csv(__DIR__ . '/import.csv'))
->run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment