result = load dataset;
apply filter to the dataset you will be working down the line, this way, we will be dealing with filtered records :
result = filter rows by predicate;
q = load "DTC_Opportunity_SAMPLE";
q = filter q by 'Closed' == "true";
q = load "opportunity";
q = group q by ('Account.AccountSource', 'Account.BillingCountry');
q = foreach q generate 'Account.AccountSource' as 'Account.AccountSource',
'Account.BillingCountry' as 'Account.BillingCountry', sum('Amount') as 'sum_Amount';
q = order q by ('Account.AccountSource' asc, 'Account.BillingCountry' asc);
r = load "opportunity";
r = group r by ( 'Account.BillingCountry');
r = foreach r generate "Total" as 'Account.AccountSource','Account.BillingCountry',
sum('Amount') as 'sum_Amount';
-- union q and r
s = union q,r;
s = group s by ('Account.AccountSource' , 'Account.BillingCountry' );
s = foreach s generate 'Account.AccountSource','Account.BillingCountry',sum('sum_Amount') as 'sum_Amount'
z = cogroup x by (day,origin), y by (day,airport);
-- You can’t have the same stream on both sides of a cogroup operation.
-- To perform a cogroup operation on one dataset,
-- load the same dataset twice so you have two streams.
a = load "opportunity";
b = load "opportunity";
b = cogroup a by ClosedDate, b by CreatedDate;
c = foreach b generate sum(a.Amount) as Amount
q = load "dataset";
q = group q by (OrderDate_Year, OrderDate_Quarter);
q = foreach q generate OrderDate_Year as Year, OrderDate_Quarter as Quarter,
sum(Sales) as sum_amt,
sum(sum(Sales)) over([.. 0] partition by all order by (OrderDate_Year, OrderDate_Quarter)) as r_sum;