Last active
November 30, 2017 22:41
-
-
Save acmiyaguchi/be491d00c1ede75abba9c5b4f42eac2e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| == Parsed Logical Plan == | |
| Repartition 1, true | |
| +- Project [channel#1422, geo#1421, is_funnelcake#1419, acquisition_period#1417, start_version#1583, sync_usage#1420, current_version#1418, current_week#1355L, source#1414, medium#1416, campaign#1409, content#1413, distribution_id#1410, default_search_engine#1415, locale#1411, is_active#1412, n_profiles#1636L, usage_hours#1638, sum_squared_usage_hours#1640, total_uri_count#1642L, unique_domains_count_per_profile#1645] | |
| +- Aggregate [medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422], [medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422, sum(cast(n_profiles#1356 as bigint)) AS n_profiles#1636L, sum(total_uri_count#1357L) AS total_uri_count#1642L, sum(sum_squared_usage_hours#1358) AS sum_squared_usage_hours#1640, sum(usage_hours#1359) AS usage_hours#1638, (sum(unique_domains_count_per_profile#1360) / cast(sum(cast(n_profiles#1356 as bigint)) as double)) AS unique_domains_count_per_profile#1645] | |
| +- Project [unique_domains_count_per_profile#1360, medium#1416, geo#1421, campaign#1409, locale#1411, is_active#1412, subsession_start#1408, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sum_squared_usage_hours#1358, usage_hours#1359, source#1414, n_profiles#1356, current_week#1355L, total_uri_count#1357L, content#1413, distribution_id#1410, default_search_engine#1415, profile_creation#1407, sync_usage#1420, start_version#1583, channel#1422] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, ... 2 more fields] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, ... 2 more fields] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, CASE WHEN StartsWith(channel#1422, beta) THEN 1 ELSE CASE WHEN StartsWith(channel#1422, aurora) THEN 2 ELSE CASE WHEN StartsWith(channel#1422, nightly) THEN 3 ELSE 0 END END END AS _offset#1501] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, cast(split(effective_version#1447, \.)[0] as int) AS _major#1475] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447] | |
| +- Join LeftOuter, (profile_creation#1407 = profile_creation#1446) | |
| :- Project [coalesce(profile_creation#1155, cast(unknown as string)) AS profile_creation#1407, coalesce(subsession_start#1129, cast(unknown as string)) AS subsession_start#1408, coalesce(campaign#1130, cast(unknown as string)) AS campaign#1409, coalesce(distribution_id#1131, cast(unknown as string)) AS distribution_id#1410, coalesce(locale#1132, cast(unknown as string)) AS locale#1411, current_week#1355L, coalesce(is_active#1134, cast(unknown as string)) AS is_active#1412, coalesce(content#1135, cast(unknown as string)) AS content#1413, coalesce(source#1136, cast(unknown as string)) AS source#1414, coalesce(default_search_engine#1137, cast(unknown as string)) AS default_search_engine#1415, coalesce(medium#1138, cast(unknown as string)) AS medium#1416, coalesce(acquisition_period#1255, cast(unknown as string)) AS acquisition_period#1417, coalesce(current_version#1140, cast(unknown as string)) AS current_version#1418, coalesce(is_funnelcake#1256, cast(unknown as string)) AS is_funnelcake#1419, coalesce(sync_usage#1142, cast(unknown as string)) AS sync_usage#1420, coalesce(geo#1143, cast(unknown as string)) AS geo#1421, coalesce(channel#1144, cast(unknown as string)) AS channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1303L, cast(0.0 as bigint)) AS current_week#1355L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, acquisition_period#1255, current_version#1140, is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, coalesce(n_profiles#1304, cast(0.0 as int)) AS n_profiles#1356, coalesce(total_uri_count#1305L, cast(0.0 as bigint)) AS total_uri_count#1357L, coalesce(nanvl(sum_squared_usage_hours#1306, cast(null as double)), cast(0.0 as double)) AS sum_squared_usage_hours#1358, coalesce(nanvl(usage_hours#1307, cast(null as double)), cast(0.0 as double)) AS usage_hours#1359, coalesce(nanvl(unique_domains_count_per_profile#1308, cast(null as double)), cast(0.0 as double)) AS unique_domains_count_per_profile#1360] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1254L, cast(0.0 as bigint)) AS current_week#1303L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, acquisition_period#1255, current_version#1140, is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, coalesce(n_profiles#1150, cast(0.0 as int)) AS n_profiles#1304, coalesce(total_uri_count#1151L, cast(0.0 as bigint)) AS total_uri_count#1305L, coalesce(nanvl(sum_squared_usage_hours#1152, cast(null as double)), cast(0.0 as double)) AS sum_squared_usage_hours#1306, coalesce(nanvl(usage_hours#1153, cast(null as double)), cast(0.0 as double)) AS usage_hours#1307, coalesce(nanvl(unique_domains_count_per_profile#1154, cast(null as double)), cast(0.0 as double)) AS unique_domains_count_per_profile#1308] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1133L, cast(-1 as bigint)) AS current_week#1254L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, coalesce(acquisition_period#1139, cast(2000-01-01 as string)) AS acquisition_period#1255, current_version#1140, coalesce(is_funnelcake#1141, cast(no as string)) AS is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, n_profiles#1150, total_uri_count#1151L, sum_squared_usage_hours#1152, usage_hours#1153, unique_domains_count_per_profile#1154] | |
| : +- Project [profile_creation#1155, CASE WHEN _is_valid#1179 THEN from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) ELSE cast(null as string) END AS subsession_start#1129, CASE WHEN _is_valid#1179 THEN attribution#909.campaign ELSE cast(null as string) END AS campaign#1130, CASE WHEN _is_valid#1179 THEN distribution_id#16 ELSE cast(null as string) END AS distribution_id#1131, CASE WHEN _is_valid#1179 THEN locale#31 ELSE cast(null as string) END AS locale#1132, CASE WHEN _is_valid#1179 THEN cast(CASE WHEN (datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) < 0) THEN cast(-1 as bigint) ELSE FLOOR((cast(datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) as double) / cast(7 as double))) END as bigint) ELSE cast(null as bigint) END AS current_week#1133L, CASE WHEN _is_valid#1179 THEN CASE WHEN (from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) < from_unixtime(unix_timestamp(20171105, yyyyMMdd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC))) THEN no ELSE yes END ELSE cast(null as string) END AS is_active#1134, CASE WHEN _is_valid#1179 THEN attribution#909.content ELSE cast(null as string) END AS content#1135, CASE WHEN _is_valid#1179 THEN attribution#909.source ELSE cast(null as string) END AS source#1136, CASE WHEN _is_valid#1179 THEN default_search_engine#54 ELSE cast(null as string) END AS default_search_engine#1137, CASE WHEN _is_valid#1179 THEN attribution#909.medium ELSE cast(null as string) END AS medium#1138, CASE WHEN _is_valid#1179 THEN date_format(cast(date_sub(next_day(cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), Sun), 7) as timestamp), yyyy-MM-dd, Some(UTC)) ELSE cast(null as string) END AS acquisition_period#1139, CASE WHEN _is_valid#1179 THEN app_version#24 ELSE cast(null as string) END AS current_version#1140, CASE WHEN _is_valid#1179 THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN yes ELSE no END ELSE cast(null as string) END AS is_funnelcake#1141, CASE WHEN _is_valid#1179 THEN CASE WHEN ((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) > 1) THEN multiple ELSE CASE WHEN (((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) = 1) || sync_configured#18) THEN single ELSE CASE WHEN isnotnull(sync_configured#18) THEN no ELSE cast(null as string) END END END ELSE cast(null as string) END AS sync_usage#1142, CASE WHEN _is_valid#1179 THEN CASE WHEN country#4 IN (BE,FR,DE,JP,HU,HK,DZ,BR,FI,GR,RU,NL,TW,TR,PH,RO,CA,PL,CH,VE,CN,EG,IR,IT,VN,CZ,AR,AU,AT,IN,ID,ES,UA,US,MY,MX,SE,GB) THEN country#4 ELSE ROW END ELSE cast(null as string) END AS geo#1143, CASE WHEN _is_valid#1179 THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN concat(normalized_channel#3, -cck-, distribution_id#16) ELSE normalized_channel#3 END ELSE cast(null as string) END AS channel#1144, 1 AS n_profiles#1150, total_uri_count#1042L AS total_uri_count#1151L, POWER((cast(usage_seconds#1040L as double) / cast(3600.0 as double)), 2.0) AS sum_squared_usage_hours#1152, (cast(usage_seconds#1040L as double) / cast(3600.0 as double)) AS usage_hours#1153, unique_domains_count_per_profile#1044 AS unique_domains_count_per_profile#1154] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19, profile_creation#1155, ((isnotnull(profile_creation#1155) && (profile_creation#1155 > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) AS _is_valid#1179] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19, date_format(cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) AS profile_creation#1155] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19] | |
| : +- Project [client_id#1, app_version#24, attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999, usage_seconds#1040L, total_uri_count#1042L, unique_domains_count_per_profile#1044] | |
| : +- Join Inner, (client_id#1 = client_id#1028) | |
| : :- Filter (client_rank#999 = 1) | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999] | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999, client_rank#999] | |
| : : +- Window [row_number() windowspecdefinition(client_id#1, timestamp#25L DESC NULLS LAST, ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS client_rank#999], [client_id#1], [timestamp#25L DESC NULLS LAST] | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : +- Union | |
| : : :- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- Project [client_id#1, app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : : +- Project [client_id#1, app_version#24, attribution#69, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : : +- Join LeftOuter, (client_id#1 = client_id#828) | |
| : : : :- Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : : +- Filter ((((subsession_start_date#14 >= 2017-11-05) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) | |
| : : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : : +- Project [client_id#828, attribution#818 AS _rhs_column_name#885] | |
| : : : +- Filter isnotnull(attribution#818) | |
| : : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : : +- Project [app_version#831, attribution#818, client_id#828, country#829, default_search_engine#827, distribution_id#826, locale#820, normalized_channel#823, profile_creation_date#830L, submission_date_s3#824, subsession_length#816L, subsession_start_date#819, sync_configured#817, sync_count_desktop#833, sync_count_mobile#822, timestamp#821L, scalar_parent_browser_engagement_total_uri_count#815, scalar_parent_browser_engagement_unique_domains_count#832] | |
| : : +- Filter ((((subsession_start_date#819 >= 2017-11-05) && (subsession_start_date#819 < 2017-11-12)) && (submission_date_s3#824 >= 20171105)) && (submission_date_s3#824 < 20171122)) | |
| : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Aggregate [client_id#1028], [client_id#1028, sum(subsession_length#1025L) AS usage_seconds#1040L, sum(cast(total_uri_count#1026 as bigint)) AS total_uri_count#1042L, avg(cast(unique_domains_count#1027 as bigint)) AS unique_domains_count_per_profile#1044] | |
| : +- Project [CASE WHEN (subsession_length#15L > cast(172800 as bigint)) THEN cast(172800 as bigint) ELSE CASE WHEN (subsession_length#15L < cast(0 as bigint)) THEN cast(0 as bigint) ELSE subsession_length#15L END END AS subsession_length#1025L, coalesce(scalar_parent_browser_engagement_total_uri_count#106, 0) AS total_uri_count#1026, coalesce(scalar_parent_browser_engagement_unique_domains_count#108, 0) AS unique_domains_count#1027, client_id#1 AS client_id#1028] | |
| : +- Union | |
| : :- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : +- Project [client_id#1, app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : +- Project [client_id#1, app_version#24, attribution#69, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : +- Join LeftOuter, (client_id#1 = client_id#828) | |
| : : :- Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- Filter ((((subsession_start_date#14 >= 2017-11-05) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) | |
| : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : +- Project [client_id#828, attribution#818 AS _rhs_column_name#885] | |
| : : +- Filter isnotnull(attribution#818) | |
| : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Project [app_version#831, attribution#818, client_id#828, country#829, default_search_engine#827, distribution_id#826, locale#820, normalized_channel#823, profile_creation_date#830L, submission_date_s3#824, subsession_length#816L, subsession_start_date#819, sync_configured#817, sync_count_desktop#833, sync_count_mobile#822, timestamp#821L, scalar_parent_browser_engagement_total_uri_count#815, scalar_parent_browser_engagement_unique_domains_count#832] | |
| : +- Filter ((((subsession_start_date#819 >= 2017-11-05) && (subsession_start_date#819 < 2017-11-12)) && (submission_date_s3#824 >= 20171105)) && (submission_date_s3#824 < 20171122)) | |
| : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| +- Project [date#993 AS profile_creation#1446, effective_version#994 AS effective_version#1447] | |
| +- LogicalRDD [date#993, effective_version#994] | |
| == Analyzed Logical Plan == | |
| channel: string, geo: string, is_funnelcake: string, acquisition_period: string, start_version: string, sync_usage: string, current_version: string, current_week: bigint, source: string, medium: string, campaign: string, content: string, distribution_id: string, default_search_engine: string, locale: string, is_active: string, n_profiles: bigint, usage_hours: double, sum_squared_usage_hours: double, total_uri_count: bigint, unique_domains_count_per_profile: double | |
| Repartition 1, true | |
| +- Project [channel#1422, geo#1421, is_funnelcake#1419, acquisition_period#1417, start_version#1583, sync_usage#1420, current_version#1418, current_week#1355L, source#1414, medium#1416, campaign#1409, content#1413, distribution_id#1410, default_search_engine#1415, locale#1411, is_active#1412, n_profiles#1636L, usage_hours#1638, sum_squared_usage_hours#1640, total_uri_count#1642L, unique_domains_count_per_profile#1645] | |
| +- Aggregate [medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422], [medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422, sum(cast(n_profiles#1356 as bigint)) AS n_profiles#1636L, sum(total_uri_count#1357L) AS total_uri_count#1642L, sum(sum_squared_usage_hours#1358) AS sum_squared_usage_hours#1640, sum(usage_hours#1359) AS usage_hours#1638, (sum(unique_domains_count_per_profile#1360) / cast(sum(cast(n_profiles#1356 as bigint)) as double)) AS unique_domains_count_per_profile#1645] | |
| +- Project [unique_domains_count_per_profile#1360, medium#1416, geo#1421, campaign#1409, locale#1411, is_active#1412, subsession_start#1408, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sum_squared_usage_hours#1358, usage_hours#1359, source#1414, n_profiles#1356, current_week#1355L, total_uri_count#1357L, content#1413, distribution_id#1410, default_search_engine#1415, profile_creation#1407, sync_usage#1420, start_version#1583, channel#1422] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, ... 2 more fields] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, ... 2 more fields] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, _major#1475, CASE WHEN StartsWith(channel#1422, beta) THEN 1 ELSE CASE WHEN StartsWith(channel#1422, aurora) THEN 2 ELSE CASE WHEN StartsWith(channel#1422, nightly) THEN 3 ELSE 0 END END END AS _offset#1501] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447, cast(split(effective_version#1447, \.)[0] as int) AS _major#1475] | |
| +- Project [profile_creation#1407, subsession_start#1408, campaign#1409, distribution_id#1410, locale#1411, current_week#1355L, is_active#1412, content#1413, source#1414, default_search_engine#1415, medium#1416, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, geo#1421, channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360, effective_version#1447] | |
| +- Join LeftOuter, (profile_creation#1407 = profile_creation#1446) | |
| :- Project [coalesce(profile_creation#1155, cast(unknown as string)) AS profile_creation#1407, coalesce(subsession_start#1129, cast(unknown as string)) AS subsession_start#1408, coalesce(campaign#1130, cast(unknown as string)) AS campaign#1409, coalesce(distribution_id#1131, cast(unknown as string)) AS distribution_id#1410, coalesce(locale#1132, cast(unknown as string)) AS locale#1411, current_week#1355L, coalesce(is_active#1134, cast(unknown as string)) AS is_active#1412, coalesce(content#1135, cast(unknown as string)) AS content#1413, coalesce(source#1136, cast(unknown as string)) AS source#1414, coalesce(default_search_engine#1137, cast(unknown as string)) AS default_search_engine#1415, coalesce(medium#1138, cast(unknown as string)) AS medium#1416, coalesce(acquisition_period#1255, cast(unknown as string)) AS acquisition_period#1417, coalesce(current_version#1140, cast(unknown as string)) AS current_version#1418, coalesce(is_funnelcake#1256, cast(unknown as string)) AS is_funnelcake#1419, coalesce(sync_usage#1142, cast(unknown as string)) AS sync_usage#1420, coalesce(geo#1143, cast(unknown as string)) AS geo#1421, coalesce(channel#1144, cast(unknown as string)) AS channel#1422, n_profiles#1356, total_uri_count#1357L, sum_squared_usage_hours#1358, usage_hours#1359, unique_domains_count_per_profile#1360] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1303L, cast(0.0 as bigint)) AS current_week#1355L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, acquisition_period#1255, current_version#1140, is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, coalesce(n_profiles#1304, cast(0.0 as int)) AS n_profiles#1356, coalesce(total_uri_count#1305L, cast(0.0 as bigint)) AS total_uri_count#1357L, coalesce(nanvl(sum_squared_usage_hours#1306, cast(null as double)), cast(0.0 as double)) AS sum_squared_usage_hours#1358, coalesce(nanvl(usage_hours#1307, cast(null as double)), cast(0.0 as double)) AS usage_hours#1359, coalesce(nanvl(unique_domains_count_per_profile#1308, cast(null as double)), cast(0.0 as double)) AS unique_domains_count_per_profile#1360] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1254L, cast(0.0 as bigint)) AS current_week#1303L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, acquisition_period#1255, current_version#1140, is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, coalesce(n_profiles#1150, cast(0.0 as int)) AS n_profiles#1304, coalesce(total_uri_count#1151L, cast(0.0 as bigint)) AS total_uri_count#1305L, coalesce(nanvl(sum_squared_usage_hours#1152, cast(null as double)), cast(0.0 as double)) AS sum_squared_usage_hours#1306, coalesce(nanvl(usage_hours#1153, cast(null as double)), cast(0.0 as double)) AS usage_hours#1307, coalesce(nanvl(unique_domains_count_per_profile#1154, cast(null as double)), cast(0.0 as double)) AS unique_domains_count_per_profile#1308] | |
| : +- Project [profile_creation#1155, subsession_start#1129, campaign#1130, distribution_id#1131, locale#1132, coalesce(current_week#1133L, cast(-1 as bigint)) AS current_week#1254L, is_active#1134, content#1135, source#1136, default_search_engine#1137, medium#1138, coalesce(acquisition_period#1139, cast(2000-01-01 as string)) AS acquisition_period#1255, current_version#1140, coalesce(is_funnelcake#1141, cast(no as string)) AS is_funnelcake#1256, sync_usage#1142, geo#1143, channel#1144, n_profiles#1150, total_uri_count#1151L, sum_squared_usage_hours#1152, usage_hours#1153, unique_domains_count_per_profile#1154] | |
| : +- Project [profile_creation#1155, CASE WHEN _is_valid#1179 THEN from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) ELSE cast(null as string) END AS subsession_start#1129, CASE WHEN _is_valid#1179 THEN attribution#909.campaign ELSE cast(null as string) END AS campaign#1130, CASE WHEN _is_valid#1179 THEN distribution_id#16 ELSE cast(null as string) END AS distribution_id#1131, CASE WHEN _is_valid#1179 THEN locale#31 ELSE cast(null as string) END AS locale#1132, CASE WHEN _is_valid#1179 THEN cast(CASE WHEN (datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) < 0) THEN cast(-1 as bigint) ELSE FLOOR((cast(datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) as double) / cast(7 as double))) END as bigint) ELSE cast(null as bigint) END AS current_week#1133L, CASE WHEN _is_valid#1179 THEN CASE WHEN (from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) < from_unixtime(unix_timestamp(20171105, yyyyMMdd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC))) THEN no ELSE yes END ELSE cast(null as string) END AS is_active#1134, CASE WHEN _is_valid#1179 THEN attribution#909.content ELSE cast(null as string) END AS content#1135, CASE WHEN _is_valid#1179 THEN attribution#909.source ELSE cast(null as string) END AS source#1136, CASE WHEN _is_valid#1179 THEN default_search_engine#54 ELSE cast(null as string) END AS default_search_engine#1137, CASE WHEN _is_valid#1179 THEN attribution#909.medium ELSE cast(null as string) END AS medium#1138, CASE WHEN _is_valid#1179 THEN date_format(cast(date_sub(next_day(cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), Sun), 7) as timestamp), yyyy-MM-dd, Some(UTC)) ELSE cast(null as string) END AS acquisition_period#1139, CASE WHEN _is_valid#1179 THEN app_version#24 ELSE cast(null as string) END AS current_version#1140, CASE WHEN _is_valid#1179 THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN yes ELSE no END ELSE cast(null as string) END AS is_funnelcake#1141, CASE WHEN _is_valid#1179 THEN CASE WHEN ((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) > 1) THEN multiple ELSE CASE WHEN (((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) = 1) || sync_configured#18) THEN single ELSE CASE WHEN isnotnull(sync_configured#18) THEN no ELSE cast(null as string) END END END ELSE cast(null as string) END AS sync_usage#1142, CASE WHEN _is_valid#1179 THEN CASE WHEN country#4 IN (BE,FR,DE,JP,HU,HK,DZ,BR,FI,GR,RU,NL,TW,TR,PH,RO,CA,PL,CH,VE,CN,EG,IR,IT,VN,CZ,AR,AU,AT,IN,ID,ES,UA,US,MY,MX,SE,GB) THEN country#4 ELSE ROW END ELSE cast(null as string) END AS geo#1143, CASE WHEN _is_valid#1179 THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN concat(normalized_channel#3, -cck-, distribution_id#16) ELSE normalized_channel#3 END ELSE cast(null as string) END AS channel#1144, 1 AS n_profiles#1150, total_uri_count#1042L AS total_uri_count#1151L, POWER((cast(usage_seconds#1040L as double) / cast(3600.0 as double)), 2.0) AS sum_squared_usage_hours#1152, (cast(usage_seconds#1040L as double) / cast(3600.0 as double)) AS usage_hours#1153, unique_domains_count_per_profile#1044 AS unique_domains_count_per_profile#1154] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19, profile_creation#1155, ((isnotnull(profile_creation#1155) && (profile_creation#1155 > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) AS _is_valid#1179] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19, date_format(cast(from_unixtime((profile_creation_date#13L * cast(86400 as bigint)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) AS profile_creation#1155] | |
| : +- Project [unique_domains_count_per_profile#1044, attribution#909, locale#31, timestamp#25L, usage_seconds#1040L, sync_count_mobile#20, submission_date_s3#379, subsession_length#15L, client_id#1, scalar_parent_browser_engagement_unique_domains_count#108, normalized_channel#3, scalar_parent_browser_engagement_total_uri_count#106, sync_configured#18, subsession_start_date#14, country#4, total_uri_count#1042L, distribution_id#16, default_search_engine#54, profile_creation_date#13L, app_version#24, sync_count_desktop#19] | |
| : +- Project [client_id#1, app_version#24, attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999, usage_seconds#1040L, total_uri_count#1042L, unique_domains_count_per_profile#1044] | |
| : +- Join Inner, (client_id#1 = client_id#1028) | |
| : :- Filter (client_rank#999 = 1) | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999] | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, client_rank#999, client_rank#999] | |
| : : +- Window [row_number() windowspecdefinition(client_id#1, timestamp#25L DESC NULLS LAST, ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS client_rank#999], [client_id#1], [timestamp#25L DESC NULLS LAST] | |
| : : +- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : +- Union | |
| : : :- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- Project [client_id#1, app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : : +- Project [client_id#1, app_version#24, attribution#69, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : : +- Join LeftOuter, (client_id#1 = client_id#828) | |
| : : : :- Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : : +- Filter ((((subsession_start_date#14 >= 2017-11-05) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) | |
| : : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : : +- Project [client_id#828, attribution#818 AS _rhs_column_name#885] | |
| : : : +- Filter isnotnull(attribution#818) | |
| : : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : : +- Project [app_version#831, attribution#818, client_id#828, country#829, default_search_engine#827, distribution_id#826, locale#820, normalized_channel#823, profile_creation_date#830L, submission_date_s3#824, subsession_length#816L, subsession_start_date#819, sync_configured#817, sync_count_desktop#833, sync_count_mobile#822, timestamp#821L, scalar_parent_browser_engagement_total_uri_count#815, scalar_parent_browser_engagement_unique_domains_count#832] | |
| : : +- Filter ((((subsession_start_date#819 >= 2017-11-05) && (subsession_start_date#819 < 2017-11-12)) && (submission_date_s3#824 >= 20171105)) && (submission_date_s3#824 < 20171122)) | |
| : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Aggregate [client_id#1028], [client_id#1028, sum(subsession_length#1025L) AS usage_seconds#1040L, sum(cast(total_uri_count#1026 as bigint)) AS total_uri_count#1042L, avg(cast(unique_domains_count#1027 as bigint)) AS unique_domains_count_per_profile#1044] | |
| : +- Project [CASE WHEN (subsession_length#15L > cast(172800 as bigint)) THEN cast(172800 as bigint) ELSE CASE WHEN (subsession_length#15L < cast(0 as bigint)) THEN cast(0 as bigint) ELSE subsession_length#15L END END AS subsession_length#1025L, coalesce(scalar_parent_browser_engagement_total_uri_count#106, 0) AS total_uri_count#1026, coalesce(scalar_parent_browser_engagement_unique_domains_count#108, 0) AS unique_domains_count#1027, client_id#1 AS client_id#1028] | |
| : +- Union | |
| : :- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : +- Project [client_id#1, app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : +- Project [client_id#1, app_version#24, attribution#69, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108, _rhs_column_name#885] | |
| : : +- Join LeftOuter, (client_id#1 = client_id#828) | |
| : : :- Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, submission_date_s3#379, subsession_length#15L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- Filter ((((subsession_start_date#14 >= 2017-11-05) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) | |
| : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : +- Project [client_id#828, attribution#818 AS _rhs_column_name#885] | |
| : : +- Filter isnotnull(attribution#818) | |
| : : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Project [app_version#831, attribution#818, client_id#828, country#829, default_search_engine#827, distribution_id#826, locale#820, normalized_channel#823, profile_creation_date#830L, submission_date_s3#824, subsession_length#816L, subsession_start_date#819, sync_configured#817, sync_count_desktop#833, sync_count_mobile#822, timestamp#821L, scalar_parent_browser_engagement_total_uri_count#815, scalar_parent_browser_engagement_unique_domains_count#832] | |
| : +- Filter ((((subsession_start_date#819 >= 2017-11-05) && (subsession_start_date#819 < 2017-11-12)) && (submission_date_s3#824 >= 20171105)) && (submission_date_s3#824 < 20171122)) | |
| : +- Project [cast(null as int) AS scalar_parent_browser_engagement_total_uri_count#815, cast(null as bigint) AS subsession_length#816L, cast(null as boolean) AS sync_configured#817, environment#766.settings.attribution AS attribution#818, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / cast(1000000000 as double)) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, environment#766.settings.locale AS locale#820, metadata#765.timestamp AS timestamp#821L, cast(null as int) AS sync_count_mobile#822, metadata#765.normalized_channel AS normalized_channel#823, submission#768 AS submission_date_s3#824, cast((crc32(encode(client_id#764, UTF-8)) % cast(100 as bigint)) as string) AS sample_id#825, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.default_search_engine AS default_search_engine#827, client_id#764 AS client_id#828, metadata#765.geo_country AS country#829, environment#766.profile.creation_date AS profile_creation_date#830L, environment#766.build.version AS app_version#831, cast(null as int) AS scalar_parent_browser_engagement_unique_domains_count#832, cast(null as int) AS sync_count_desktop#833] | |
| : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| +- Project [date#993 AS profile_creation#1446, effective_version#994 AS effective_version#1447] | |
| +- LogicalRDD [date#993, effective_version#994] | |
| == Optimized Logical Plan == | |
| Repartition 1, true | |
| +- Aggregate [medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422], [channel#1422, geo#1421, is_funnelcake#1419, acquisition_period#1417, start_version#1583, sync_usage#1420, current_version#1418, current_week#1355L, source#1414, medium#1416, campaign#1409, content#1413, distribution_id#1410, default_search_engine#1415, locale#1411, is_active#1412, sum(cast(n_profiles#1356 as bigint)) AS n_profiles#1636L, sum(usage_hours#1359) AS usage_hours#1638, sum(sum_squared_usage_hours#1358) AS sum_squared_usage_hours#1640, sum(total_uri_count#1357L) AS total_uri_count#1642L, (sum(unique_domains_count_per_profile#1360) / cast(sum(cast(n_profiles#1356 as bigint)) as double)) AS unique_domains_count_per_profile#1645] | |
| +- Project [unique_domains_count_per_profile#1360, medium#1416, geo#1421, campaign#1409, locale#1411, is_active#1412, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sum_squared_usage_hours#1358, usage_hours#1359, source#1414, n_profiles#1356, current_week#1355L, total_uri_count#1357L, content#1413, distribution_id#1410, default_search_engine#1415, sync_usage#1420, coalesce(CASE WHEN isnull(effective_version#994) THEN CASE WHEN (profile_creation#1407 < 2015-01-01) THEN older ELSE newer END ELSE CASE WHEN StartsWith(channel#1422, release) THEN effective_version#994 ELSE concat(cast((cast(split(effective_version#994, \.)[0] as int) + CASE WHEN StartsWith(channel#1422, beta) THEN 1 ELSE CASE WHEN StartsWith(channel#1422, aurora) THEN 2 ELSE CASE WHEN StartsWith(channel#1422, nightly) THEN 3 ELSE 0 END END END) as string), .0) END END, unknown) AS start_version#1583, channel#1422] | |
| +- Join LeftOuter, (profile_creation#1407 = profile_creation#1446) | |
| :- Project [coalesce(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)), unknown) AS profile_creation#1407, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.campaign ELSE null END, unknown) AS campaign#1409, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN distribution_id#16 ELSE null END, unknown) AS distribution_id#1410, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN locale#31 ELSE null END, unknown) AS locale#1411, coalesce(coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN (datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) < 0) THEN -1 ELSE FLOOR((cast(datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) as double) / 7.0)) END ELSE null END, -1), 0), 0) AS current_week#1355L, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN (from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) < 2017-11-05 00:00:00) THEN no ELSE yes END ELSE null END, unknown) AS is_active#1412, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.content ELSE null END, unknown) AS content#1413, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.source ELSE null END, unknown) AS source#1414, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN default_search_engine#54 ELSE null END, unknown) AS default_search_engine#1415, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.medium ELSE null END, unknown) AS medium#1416, coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN date_format(cast(date_sub(next_day(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), Sun), 7) as timestamp), yyyy-MM-dd, Some(UTC)) ELSE null END, 2000-01-01), unknown) AS acquisition_period#1417, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN app_version#24 ELSE null END, unknown) AS current_version#1418, coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN yes ELSE no END ELSE null END, no), unknown) AS is_funnelcake#1419, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN ((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) > 1) THEN multiple ELSE CASE WHEN (((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) = 1) || sync_configured#18) THEN single ELSE CASE WHEN isnotnull(sync_configured#18) THEN no ELSE null END END END ELSE null END, unknown) AS sync_usage#1420, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN country#4 INSET (IR,IT,FR,CA,AT,AR,JP,DE,ID,IN,EG,BE,DZ,MX,PH,UA,US,RO,RU,GR,NL,GB,VN,PL,SE,MY,AU,TW,HU,TR,CZ,CN,FI,ES,VE,CH,BR,HK) THEN country#4 ELSE ROW END ELSE null END, unknown) AS geo#1421, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN concat(normalized_channel#3, -cck-, distribution_id#16) ELSE normalized_channel#3 END ELSE null END, unknown) AS channel#1422, 1 AS n_profiles#1356, coalesce(coalesce(total_uri_count#1042L, 0), 0) AS total_uri_count#1357L, coalesce(nanvl(coalesce(nanvl(POWER((cast(usage_seconds#1040L as double) / 3600.0), 2.0), null), 0.0), null), 0.0) AS sum_squared_usage_hours#1358, coalesce(nanvl(coalesce(nanvl((cast(usage_seconds#1040L as double) / 3600.0), null), 0.0), null), 0.0) AS usage_hours#1359, coalesce(nanvl(coalesce(nanvl(unique_domains_count_per_profile#1044, null), 0.0), null), 0.0) AS unique_domains_count_per_profile#1360] | |
| : +- Join Inner, (client_id#1 = client_id#1028) | |
| : :- Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20] | |
| : : +- Filter (isnotnull(client_rank#999) && (client_rank#999 = 1)) | |
| : : +- Window [row_number() windowspecdefinition(client_id#1, timestamp#25L DESC NULLS LAST, ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS client_rank#999], [client_id#1], [timestamp#25L DESC NULLS LAST] | |
| : : +- Union | |
| : : :- Project [app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L] | |
| : : : +- Join LeftOuter, (client_id#1 = client_id#764) | |
| : : : :- Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L] | |
| : : : : +- Filter ((((((isnotnull(submission_date_s3#379) && isnotnull(subsession_start_date#14)) && (subsession_start_date#14 >= 2017-11-05)) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) && isnotnull(client_id#1)) | |
| : : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : : +- Project [client_id#764, environment#766.settings.attribution AS _rhs_column_name#885] | |
| : : : +- Filter (isnotnull(environment#766) && isnotnull(environment#766.settings.attribution)) | |
| : : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : : +- Project [environment#766.build.version AS app_version#831, environment#766.settings.attribution AS attribution#818, client_id#764, metadata#765.geo_country AS country#829, environment#766.settings.default_search_engine AS default_search_engine#827, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.locale AS locale#820, metadata#765.normalized_channel AS normalized_channel#823, environment#766.profile.creation_date AS profile_creation_date#830L, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, null AS sync_configured#817, null AS sync_count_desktop#833, null AS sync_count_mobile#822, metadata#765.timestamp AS timestamp#821L] | |
| : : +- Filter (((((isnotnull(submission#768) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) >= 2017-11-05)) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) < 2017-11-12)) && (submission#768 >= 20171105)) && (submission#768 < 20171122)) && isnotnull(client_id#764)) | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Aggregate [client_id#1028], [client_id#1028, sum(subsession_length#1025L) AS usage_seconds#1040L, sum(cast(total_uri_count#1026 as bigint)) AS total_uri_count#1042L, avg(cast(unique_domains_count#1027 as bigint)) AS unique_domains_count_per_profile#1044] | |
| : +- Union | |
| : :- Project [CASE WHEN (subsession_length#15L > 172800) THEN 172800 ELSE CASE WHEN (subsession_length#15L < 0) THEN 0 ELSE subsession_length#15L END END AS subsession_length#1025L, coalesce(scalar_parent_browser_engagement_total_uri_count#106, 0) AS total_uri_count#1026, coalesce(scalar_parent_browser_engagement_unique_domains_count#108, 0) AS unique_domains_count#1027, client_id#1 AS client_id#1028] | |
| : : +- Join LeftOuter, (client_id#1 = client_id#764) | |
| : : :- Project [client_id#1, subsession_length#15L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- Filter ((((((isnotnull(submission_date_s3#379) && isnotnull(subsession_start_date#14)) && (subsession_start_date#14 >= 2017-11-05)) && (subsession_start_date#14 < 2017-11-12)) && (submission_date_s3#379 >= 20171105)) && (submission_date_s3#379 < 20171122)) && isnotnull(client_id#1)) | |
| : : : +- Relation[document_id#0,client_id#1,channel#2,normalized_channel#3,country#4,city#5,os#6,os_version#7,os_service_pack_major#8L,os_service_pack_minor#9L,windows_build_number#10L,windows_ubr#11L,install_year#12L,profile_creation_date#13L,subsession_start_date#14,subsession_length#15L,distribution_id#16,submission_date#17,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_build_id#21,app_display_version#22,app_name#23,... 357 more fields] parquet | |
| : : +- Project [client_id#764] | |
| : : +- Filter (isnotnull(environment#766) && isnotnull(environment#766.settings.attribution)) | |
| : : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| : +- Project [null AS subsession_length#1025L, 0 AS total_uri_count#1026, 0 AS unique_domains_count#1027, client_id#764] | |
| : +- Filter (((((isnotnull(submission#768) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) >= 2017-11-05)) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) < 2017-11-12)) && (submission#768 >= 20171105)) && (submission#768 < 20171122)) && isnotnull(client_id#764)) | |
| : +- Relation[id#763,client_id#764,metadata#765,environment#766,payload#767,submission#768] parquet | |
| +- Project [date#993 AS profile_creation#1446, effective_version#994] | |
| +- LogicalRDD [date#993, effective_version#994] | |
| == Physical Plan == | |
| Exchange RoundRobinPartitioning(1) | |
| +- *HashAggregate(keys=[medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422], functions=[sum(cast(n_profiles#1356 as bigint)), sum(usage_hours#1359), sum(sum_squared_usage_hours#1358), sum(total_uri_count#1357L), sum(unique_domains_count_per_profile#1360)], output=[channel#1422, geo#1421, is_funnelcake#1419, acquisition_period#1417, start_version#1583, sync_usage#1420, current_version#1418, current_week#1355L, source#1414, medium#1416, campaign#1409, content#1413, distribution_id#1410, default_search_engine#1415, locale#1411, is_active#1412, n_profiles#1636L, usage_hours#1638, sum_squared_usage_hours#1640, total_uri_count#1642L, unique_domains_count_per_profile#1645]) | |
| +- Exchange hashpartitioning(medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422, 200) | |
| +- *HashAggregate(keys=[medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422], functions=[partial_sum(cast(n_profiles#1356 as bigint)), partial_sum(usage_hours#1359), partial_sum(sum_squared_usage_hours#1358), partial_sum(total_uri_count#1357L), partial_sum(unique_domains_count_per_profile#1360)], output=[medium#1416, campaign#1409, source#1414, locale#1411, geo#1421, current_week#1355L, is_active#1412, content#1413, distribution_id#1410, default_search_engine#1415, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sync_usage#1420, start_version#1583, channel#1422, sum#1811L, sum#1812, sum#1813, sum#1814L, sum#1815]) | |
| +- *Project [unique_domains_count_per_profile#1360, medium#1416, geo#1421, campaign#1409, locale#1411, is_active#1412, acquisition_period#1417, current_version#1418, is_funnelcake#1419, sum_squared_usage_hours#1358, usage_hours#1359, source#1414, n_profiles#1356, current_week#1355L, total_uri_count#1357L, content#1413, distribution_id#1410, default_search_engine#1415, sync_usage#1420, coalesce(CASE WHEN isnull(effective_version#994) THEN CASE WHEN (profile_creation#1407 < 2015-01-01) THEN older ELSE newer END ELSE CASE WHEN StartsWith(channel#1422, release) THEN effective_version#994 ELSE concat(cast((cast(split(effective_version#994, \.)[0] as int) + CASE WHEN StartsWith(channel#1422, beta) THEN 1 ELSE CASE WHEN StartsWith(channel#1422, aurora) THEN 2 ELSE CASE WHEN StartsWith(channel#1422, nightly) THEN 3 ELSE 0 END END END) as string), .0) END END, unknown) AS start_version#1583, channel#1422] | |
| +- SortMergeJoin [profile_creation#1407], [profile_creation#1446], LeftOuter | |
| :- *Sort [profile_creation#1407 ASC NULLS FIRST], false, 0 | |
| : +- Exchange hashpartitioning(profile_creation#1407, 200) | |
| : +- *Project [coalesce(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)), unknown) AS profile_creation#1407, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.campaign ELSE null END, unknown) AS campaign#1409, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN distribution_id#16 ELSE null END, unknown) AS distribution_id#1410, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN locale#31 ELSE null END, unknown) AS locale#1411, coalesce(coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN (datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) < 0) THEN -1 ELSE FLOOR((cast(datediff(cast(from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date)) as double) / 7.0)) END ELSE null END, -1), 0), 0) AS current_week#1355L, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN (from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)) < 2017-11-05 00:00:00) THEN no ELSE yes END ELSE null END, unknown) AS is_active#1412, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.content ELSE null END, unknown) AS content#1413, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.source ELSE null END, unknown) AS source#1414, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN default_search_engine#54 ELSE null END, unknown) AS default_search_engine#1415, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN attribution#909.medium ELSE null END, unknown) AS medium#1416, coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN date_format(cast(date_sub(next_day(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as date), Sun), 7) as timestamp), yyyy-MM-dd, Some(UTC)) ELSE null END, 2000-01-01), unknown) AS acquisition_period#1417, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN app_version#24 ELSE null END, unknown) AS current_version#1418, coalesce(coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN yes ELSE no END ELSE null END, no), unknown) AS is_funnelcake#1419, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN ((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) > 1) THEN multiple ELSE CASE WHEN (((coalesce(sync_count_desktop#19, 0) + coalesce(sync_count_mobile#20, 0)) = 1) || sync_configured#18) THEN single ELSE CASE WHEN isnotnull(sync_configured#18) THEN no ELSE null END END END ELSE null END, unknown) AS sync_usage#1420, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN country#4 INSET (IR,IT,FR,CA,AT,AR,JP,DE,ID,IN,EG,BE,DZ,MX,PH,UA,US,RO,RU,GR,NL,GB,VN,PL,SE,MY,AU,TW,HU,TR,CZ,CN,FI,ES,VE,CH,BR,HK) THEN country#4 ELSE ROW END ELSE null END, unknown) AS geo#1421, coalesce(CASE WHEN ((isnotnull(date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC))) && (date_format(cast(from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) as timestamp), yyyy-MM-dd, Some(UTC)) > 2000-01-01)) && (from_unixtime((profile_creation_date#13L * 86400), yyyy-MM-dd HH:mm:ss, Some(UTC)) <= from_unixtime(unix_timestamp(subsession_start_date#14, yyyy-MM-dd, Some(UTC)), yyyy-MM-dd HH:mm:ss, Some(UTC)))) THEN CASE WHEN distribution_id#16 RLIKE ^mozilla[0-9]+.*$ THEN concat(normalized_channel#3, -cck-, distribution_id#16) ELSE normalized_channel#3 END ELSE null END, unknown) AS channel#1422, 1 AS n_profiles#1356, coalesce(coalesce(total_uri_count#1042L, 0), 0) AS total_uri_count#1357L, coalesce(nanvl(coalesce(nanvl(POWER((cast(usage_seconds#1040L as double) / 3600.0), 2.0), null), 0.0), null), 0.0) AS sum_squared_usage_hours#1358, coalesce(nanvl(coalesce(nanvl((cast(usage_seconds#1040L as double) / 3600.0), null), 0.0), null), 0.0) AS usage_hours#1359, coalesce(nanvl(coalesce(nanvl(unique_domains_count_per_profile#1044, null), 0.0), null), 0.0) AS unique_domains_count_per_profile#1360] | |
| : +- *SortMergeJoin [client_id#1], [client_id#1028], Inner | |
| : :- *Project [app_version#24, attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20] | |
| : : +- *Filter (isnotnull(client_rank#999) && (client_rank#999 = 1)) | |
| : : +- Window [row_number() windowspecdefinition(client_id#1, timestamp#25L DESC NULLS LAST, ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS client_rank#999], [client_id#1], [timestamp#25L DESC NULLS LAST] | |
| : : +- *Sort [client_id#1 ASC NULLS FIRST, timestamp#25L DESC NULLS LAST], false, 0 | |
| : : +- Exchange hashpartitioning(client_id#1, 200) | |
| : : +- Union | |
| : : :- *Project [app_version#24, coalesce(_rhs_column_name#885, attribution#69) AS attribution#909, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L] | |
| : : : +- SortMergeJoin [client_id#1], [client_id#764], LeftOuter | |
| : : : :- *Sort [client_id#1 ASC NULLS FIRST], false, 0 | |
| : : : : +- Exchange hashpartitioning(client_id#1, 200) | |
| : : : : +- *Project [app_version#24, attribution#69, client_id#1, country#4, default_search_engine#54, distribution_id#16, locale#31, normalized_channel#3, profile_creation_date#13L, subsession_start_date#14, sync_configured#18, sync_count_desktop#19, sync_count_mobile#20, timestamp#25L] | |
| : : : : +- *Filter (((isnotnull(subsession_start_date#14) && (subsession_start_date#14 >= 2017-11-05)) && (subsession_start_date#14 < 2017-11-12)) && isnotnull(client_id#1)) | |
| : : : : +- *FileScan parquet [client_id#1,normalized_channel#3,country#4,profile_creation_date#13L,subsession_start_date#14,distribution_id#16,sync_configured#18,sync_count_desktop#19,sync_count_mobile#20,app_version#24,timestamp#25L,locale#31,default_search_engine#54,attribution#69,submission_date_s3#379,sample_id#380] Batched: false, Format: Parquet, Location: InMemoryFileIndex[s3://telemetry-parquet/main_summary/v4], PartitionCount: 1704, PartitionFilters: [isnotnull(submission_date_s3#379), (submission_date_s3#379 >= 20171105), (submission_date_s3#379..., PushedFilters: [IsNotNull(subsession_start_date), GreaterThanOrEqual(subsession_start_date,2017-11-05), LessThan..., ReadSchema: struct<client_id:string,normalized_channel:string,country:string,profile_creation_date:bigint,sub... | |
| : : : +- *Sort [client_id#764 ASC NULLS FIRST], false, 0 | |
| : : : +- Exchange hashpartitioning(client_id#764, 200) | |
| : : : +- *Project [client_id#764, environment#766.settings.attribution AS _rhs_column_name#885] | |
| : : : +- *Filter (isnotnull(environment#766) && isnotnull(environment#766.settings.attribution)) | |
| : : : +- *FileScan parquet [client_id#764,environment#766,submission#768] Batched: false, Format: Parquet, Location: InMemoryFileIndex[s3://net-mozaws-prod-us-west-2-pipeline-data/telemetry-new-profile-parquet/v1], PartitionCount: 158, PartitionFilters: [], PushedFilters: [IsNotNull(environment)], ReadSchema: struct<client_id:string,environment:struct<build:struct<application_name:string,architecture:stri... | |
| : : +- *Project [environment#766.build.version AS app_version#831, environment#766.settings.attribution AS attribution#818, client_id#764, metadata#765.geo_country AS country#829, environment#766.settings.default_search_engine AS default_search_engine#827, environment#766.partner.distribution_id AS distribution_id#826, environment#766.settings.locale AS locale#820, metadata#765.normalized_channel AS normalized_channel#823, environment#766.profile.creation_date AS profile_creation_date#830L, from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) AS subsession_start_date#819, null AS sync_configured#817, null AS sync_count_desktop#833, null AS sync_count_mobile#822, metadata#765.timestamp AS timestamp#821L] | |
| : : +- *Filter (((from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) >= 2017-11-05) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) < 2017-11-12)) && isnotnull(client_id#764)) | |
| : : +- *FileScan parquet [client_id#764,metadata#765,environment#766,submission#768] Batched: false, Format: Parquet, Location: InMemoryFileIndex[s3://net-mozaws-prod-us-west-2-pipeline-data/telemetry-new-profile-parquet/v1], PartitionCount: 17, PartitionFilters: [isnotnull(submission#768), (submission#768 >= 20171105), (submission#768 < 20171122)], PushedFilters: [IsNotNull(client_id)], ReadSchema: struct<client_id:string,metadata:struct<timestamp:bigint,date:string,normalized_channel:string,ge... | |
| : +- *Sort [client_id#1028 ASC NULLS FIRST], false, 0 | |
| : +- *HashAggregate(keys=[client_id#1028], functions=[sum(subsession_length#1025L), sum(cast(total_uri_count#1026 as bigint)), avg(cast(unique_domains_count#1027 as bigint))], output=[client_id#1028, usage_seconds#1040L, total_uri_count#1042L, unique_domains_count_per_profile#1044]) | |
| : +- Exchange hashpartitioning(client_id#1028, 200) | |
| : +- *HashAggregate(keys=[client_id#1028], functions=[partial_sum(subsession_length#1025L), partial_sum(cast(total_uri_count#1026 as bigint)), partial_avg(cast(unique_domains_count#1027 as bigint))], output=[client_id#1028, sum#1820L, sum#1821L, sum#1822, count#1823L]) | |
| : +- Union | |
| : :- *Project [CASE WHEN (subsession_length#15L > 172800) THEN 172800 ELSE CASE WHEN (subsession_length#15L < 0) THEN 0 ELSE subsession_length#15L END END AS subsession_length#1025L, coalesce(scalar_parent_browser_engagement_total_uri_count#106, 0) AS total_uri_count#1026, coalesce(scalar_parent_browser_engagement_unique_domains_count#108, 0) AS unique_domains_count#1027, client_id#1 AS client_id#1028] | |
| : : +- SortMergeJoin [client_id#1], [client_id#764], LeftOuter | |
| : : :- *Sort [client_id#1 ASC NULLS FIRST], false, 0 | |
| : : : +- Exchange hashpartitioning(client_id#1, 200) | |
| : : : +- *Project [client_id#1, subsession_length#15L, scalar_parent_browser_engagement_total_uri_count#106, scalar_parent_browser_engagement_unique_domains_count#108] | |
| : : : +- *Filter (((isnotnull(subsession_start_date#14) && (subsession_start_date#14 >= 2017-11-05)) && (subsession_start_date#14 < 2017-11-12)) && isnotnull(client_id#1)) | |
| : : : +- *FileScan parquet [client_id#1,subsession_start_date#14,subsession_length#15L,scalar_parent_browser_engagement_total_uri_count#106,scalar_parent_browser_engagement_unique_domains_count#108,submission_date_s3#379,sample_id#380] Batched: true, Format: Parquet, Location: InMemoryFileIndex[s3://telemetry-parquet/main_summary/v4], PartitionCount: 1704, PartitionFilters: [isnotnull(submission_date_s3#379), (submission_date_s3#379 >= 20171105), (submission_date_s3#379..., PushedFilters: [IsNotNull(subsession_start_date), GreaterThanOrEqual(subsession_start_date,2017-11-05), LessThan..., ReadSchema: struct<client_id:string,subsession_start_date:string,subsession_length:bigint,scalar_parent_brows... | |
| : : +- *Sort [client_id#764 ASC NULLS FIRST], false, 0 | |
| : : +- Exchange hashpartitioning(client_id#764, 200) | |
| : : +- *Project [client_id#764] | |
| : : +- *Filter (isnotnull(environment#766) && isnotnull(environment#766.settings.attribution)) | |
| : : +- *FileScan parquet [client_id#764,environment#766,submission#768] Batched: false, Format: Parquet, Location: InMemoryFileIndex[s3://net-mozaws-prod-us-west-2-pipeline-data/telemetry-new-profile-parquet/v1], PartitionCount: 158, PartitionFilters: [], PushedFilters: [IsNotNull(environment)], ReadSchema: struct<client_id:string,environment:struct<build:struct<application_name:string,architecture:stri... | |
| : +- *Project [null AS subsession_length#1025L, 0 AS total_uri_count#1026, 0 AS unique_domains_count#1027, client_id#764] | |
| : +- *Filter (((from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) >= 2017-11-05) && (from_unixtime(cast((cast(metadata#765.creation_timestamp as double) / 1.0E9) as bigint), yyyy-MM-dd'T'HH:mm:ss.S'+00:00', Some(UTC)) < 2017-11-12)) && isnotnull(client_id#764)) | |
| : +- *FileScan parquet [client_id#764,metadata#765,submission#768] Batched: false, Format: Parquet, Location: InMemoryFileIndex[s3://net-mozaws-prod-us-west-2-pipeline-data/telemetry-new-profile-parquet/v1], PartitionCount: 17, PartitionFilters: [isnotnull(submission#768), (submission#768 >= 20171105), (submission#768 < 20171122)], PushedFilters: [IsNotNull(client_id)], ReadSchema: struct<client_id:string,metadata:struct<timestamp:bigint,date:string,normalized_channel:string,ge... | |
| +- *Sort [profile_creation#1446 ASC NULLS FIRST], false, 0 | |
| +- Exchange hashpartitioning(profile_creation#1446, 200) | |
| +- *Project [date#993 AS profile_creation#1446, effective_version#994] | |
| +- Scan ExistingRDD[date#993,effective_version#994] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| with attribution_state as ( | |
| SELECT client_id, | |
| activity_date, | |
| attribution_state | |
| coalesce( | |
| lag(attribution_state, 1) over ( | |
| partition by client_id | |
| order by activity_date, profile_subsession_counter | |
| ), | |
| attribution_state) as prev_attribution_state | |
| FROM ( | |
| SELECT *, attribution is not null as attribution_state | |
| FROM subset | |
| ) | |
| ) |
Author
acmiyaguchi
commented
Nov 16, 2017
Author
scala> rdd.first().fields
res13: Seq[com.mozilla.telemetry.heka.Field] =
Vector(name: "submission"
value_type: BYTES
representation: "json"
value_bytes: "{\"type\":\"first-shutdown\",\"id\":\"de979dd0-10ba-e74d-9792-faf01930aefe\",\"creationDate\":\"2017-11-01
T13:50:09.926Z\",\"version\":4,\"application\":{\"architecture\":\"x86-64\",\"buildId\":\"20171026211016\",\"name\":\"Fi
refox\",\"version\":\"57.0\",\"displayVersion\":\"57.0b12\",\"vendor\":\"Mozilla\",\"platformVersion\":\"57.0\",\"xpcomA
bi\":\"x86_64-gcc3\",\"channel\":\"beta-cdntest\"},\"payload\":{\"ver\":4,\"simpleMeasurements\":{\"totalTime\":10,\"upt
ime\":0,\"start\":17,\"main\":59,\"selectProfile\":129,\"startupCrashDetectionBegin\":214,\"startupCrashDetectionEnd\":9
788,\"firstPaint\":1720,\"sessionRestoreInit\":781,\"sessionRestored\":2178,\"creat...
scala> main_rdd.first().fields
res12: Seq[com.mozilla.telemetry.heka.Field] =
Vector(name: "appUpdateChannel"
value_string: "release-cck-mozillaonline"
, name: "sourceVersion"
value_type: DOUBLE
value_double: 4.0
, name: "docType"
value_string: "main"
, name: "os"
value_string: "Windows_NT"
, name: "environment.partner"
value_type: BYTES
representation: "json"
value_bytes: "{\"distributionId\":\"MozillaOnline\",\"distributionVersion\":\"2015.5\",\"partnerId\":\"mozillaonline\",\
"distributor\":\"mozillaonline\",\"distributorChannel\":\"mainWinStub\",\"partnerNames\":[\"mozillaonline\"]}"
, name: "appVendor"
value_string: "Mozilla"
, name: "submissionDate"
value_string: "20171101"
, name: "appVersion"
value_string: "39.0.3"
, name: "payload.chromeHangs"
value_type: BYTES
representation: "json"
value_bytes: "{\"memoryMa...
Author
./mozetl-submit.sh -p https://github.com/acmiyaguchi/python_mozetl.git -b bug-1419080-attribution churn --start_date 20171115 --bucket net-mozaws-prod-us-west-2-pipeline-analysis --prefix amiyaguchi/bug_14019080_churn/v3 | tee bug14019080.log
Author
./mozetl-submit.sh -p https://github.com/acmiyaguchi/python_mozetl.git -b testing-attribution-perf churn --start_date 20171115 --bucket net-mozaws-prod-us-west-2-pipeline-analysis --prefix amiyaguchi/bug_14019080_churn/v3 | tee bug14019080.log
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment