Grokbase Groups Pig user April 2013
FAQ
Hi friends,

I am new to PIG script. I need to convert below sql query to PIG script.


SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,

CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM

(



SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,

MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
'N' END) TAC_142 FROM

(

SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
'%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
THEN 'Y' ELSE 'N' END) TAC_1,

MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
'Y' ELSE 'N' END) TAC_3

FROM

D_INSTALLATION DI,

D_INSTALLATION_PRODUCT DIP

WHERE

DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

DIP.BAC_WID = DI.BAC_WID

GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO

)

GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)

T1,

D_BILLING_ACCOUNT DB

WHERE

DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND

DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)


--------------------------------------------------------------------------


I have tried to write a below inner query into PIG.


SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
'%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
THEN 'Y' ELSE 'N' END) TAC_1,

MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
'Y' ELSE 'N' END) TAC_3

FROM

D_INSTALLATION DI,

D_INSTALLATION_PRODUCT DIP

WHERE

DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

DIP.BAC_WID = DI.BAC_WID

GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO


PIG Script


A = load 'D_INSTALLATION.txt';
B= load 'D_INSTALLATION_PRODUCT.txt';
C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
D= join C by BAC_WID,B by BAC_WID;

caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
UPPER(HAZARD)=='999%EMERGENCY%LINE' and
UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,

((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
('E','T')) ? 'Y':'N') As TAC_2,

(PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
grouped = group caseData by
DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
Data = foreach grouped generate group as
DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3


It is giving lot of errors. Can you please help me.. attached are the
tables

Search Discussions

  • j.barrett Strausser at Apr 22, 2013 at 9:04 pm
    You'll have more luck if you post the errors.

    Off the bat, I assume you are going to have problems given your load
    statement.

    -b

    On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop wrote:

    Hi friends,

    I am new to PIG script. I need to convert below sql query to PIG script.


    SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,

    CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM

    (



    SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,

    MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
    'N' END) TAC_142 FROM

    (

    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO

    )

    GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)

    T1,

    D_BILLING_ACCOUNT DB

    WHERE

    DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND

    DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)


    --------------------------------------------------------------------------


    I have tried to write a below inner query into PIG.


    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO


    PIG Script


    A = load 'D_INSTALLATION.txt';
    B= load 'D_INSTALLATION_PRODUCT.txt';
    C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
    D= join C by BAC_WID,B by BAC_WID;

    caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
    UPPER(HAZARD)=='999%EMERGENCY%LINE' and
    UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,

    ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
    ('E','T')) ? 'Y':'N') As TAC_2,

    (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
    grouped = group caseData by
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
    Data = foreach grouped generate group as
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
    TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3


    It is giving lot of errors. Can you please help me.. attached are the
    tables




    --


    https://github.com/bearrito
    @barrettsmash
  • Raj hadoop at Apr 22, 2013 at 9:22 pm
    Hi,

    Thanks for giving reply. I didn't have any issues with load statement as i
    have mentioned the correct file path while loading. Could you please check
    the rest of the logic..


    On Tue, Apr 23, 2013 at 2:33 AM, j.barrett Strausser wrote:

    You'll have more luck if you post the errors.

    Off the bat, I assume you are going to have problems given your load
    statement.

    -b

    On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop wrote:

    Hi friends,

    I am new to PIG script. I need to convert below sql query to PIG script.


    SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,

    CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM

    (



    SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,

    MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y' ELSE
    'N' END) TAC_142 FROM

    (

    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
    '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO

    )

    GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)

    T1,

    D_BILLING_ACCOUNT DB

    WHERE

    DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND

    DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)


    --------------------------------------------------------------------------

    I have tried to write a below inner query into PIG.


    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD) LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
    '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END) TAC_2,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640') THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO


    PIG Script


    A = load 'D_INSTALLATION.txt';
    B= load 'D_INSTALLATION_PRODUCT.txt';
    C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
    D= join C by BAC_WID,B by BAC_WID;

    caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
    UPPER(HAZARD)=='999%EMERGENCY%LINE' and
    UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,

    ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
    ('E','T')) ? 'Y':'N') As TAC_2,

    (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
    grouped = group caseData by
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
    Data = foreach grouped generate group as
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
    TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3


    It is giving lot of errors. Can you please help me.. attached are the
    tables




    --


    https://github.com/bearrito
    @barrettsmash
  • j.barrett Strausser at Apr 22, 2013 at 10:10 pm
    Can you post the errors?

    On Mon, Apr 22, 2013 at 5:22 PM, Raj hadoop wrote:

    Hi,

    Thanks for giving reply. I didn't have any issues with load statement as i
    have mentioned the correct file path while loading. Could you please check
    the rest of the logic..



    On Tue, Apr 23, 2013 at 2:33 AM, j.barrett Strausser <
    j.barrett.strausser@gmail.com> wrote:
    You'll have more luck if you post the errors.

    Off the bat, I assume you are going to have problems given your load
    statement.

    -b

    On Mon, Apr 22, 2013 at 4:59 PM, Raj hadoop wrote:

    Hi friends,

    I am new to PIG script. I need to convert below sql query to PIG
    script.

    SELECT ('CSS'||DB.DISTRICT_CODE||DB.BILLING_ACCOUNT_NO) BAC_KEY,

    CASE WHEN T1.TAC_142 IS NULL THEN 'N' ELSE T1.TAC_142 END TAC_142 FROM

    (



    SELECT DISTRICT_CODE,BILLING_ACCOUNT_NO,

    MAX(CASE WHEN TAC_1 = 'Y' AND (TAC_2 = 'Y' OR TAC_3 = 'Y') THEN 'Y'
    ELSE
    'N' END) TAC_142 FROM

    (

    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD)
    LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
    '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END)
    TAC_2,
    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640')
    THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO

    )

    GROUP BY DISTRICT_CODE,BILLING_ACCOUNT_NO)

    T1,

    D_BILLING_ACCOUNT DB

    WHERE

    DB.DISTRICT_CODE = T1.DISTRICT_CODE(+) AND

    DB.BILLING_ACCOUNT_NO = T1.BILLING_ACCOUNT_NO(+)

    --------------------------------------------------------------------------

    I have tried to write a below inner query into PIG.


    SELECT DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) = 'A14493' AND UPPER(DI.HAZARD)
    LIKE
    '%999%EMERGENCY%LINE%' AND UPPER(DI.WARNING) LIKE
    '%USE%999%ALERT%METHOD%'
    THEN 'Y' ELSE 'N' END) TAC_1,

    MAX(CASE WHEN TRIM(DIP.PRODUCT_TYPE) IN ('20','21') AND
    TRIM(DIP.MAINTENANCE_CONTRACT) IN ('E','T') THEN 'Y' ELSE 'N' END)
    TAC_2,
    MAX(CASE WHEN TRIM(DIP.PRODUCT_CODE) IN ('A14498','A14428','A22640')
    THEN
    'Y' ELSE 'N' END) TAC_3

    FROM

    D_INSTALLATION DI,

    D_INSTALLATION_PRODUCT DIP

    WHERE

    DIP.INST_SEQUENCE_NO = DI.INST_SEQUENCE_NO AND

    DIP.BAC_WID = DI.BAC_WID

    GROUP BY DI.DISTRICT_CODE,DI.BILLING_ACCOUNT_NO,DI.INST_SEQUENCE_NO


    PIG Script


    A = load 'D_INSTALLATION.txt';
    B= load 'D_INSTALLATION_PRODUCT.txt';
    C= join A by DI.INST_SEQUENCE_NO, B by INST_SEQUENCE_NO;
    D= join C by BAC_WID,B by BAC_WID;

    caseData = foreach D generate ((PRODUCT_CODE=='A14493' and
    UPPER(HAZARD)=='999%EMERGENCY%LINE' and
    UPPER(WARNING)=='USE%999%ALERT%METHOD') ? 'Y':'N') AS TAC_1,

    ((PRODUCT_TYPE IN ('20','21') and MAINTENANCE_CONTRACT IN
    ('E','T')) ? 'Y':'N') As TAC_2,

    (PRODUCT_CODE IN('A14498','A22640','A22640') ? 'Y':'N') As TAC_3;
    grouped = group caseData by
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO;
    Data = foreach grouped generate group as
    DISTRICT_CODE,BILLING_ACCOUNT_NO,INST_SEQUENCE_NO,MAX(TAC_1) as
    TAC1,MAX(TAC_2) as TAC2,MAX(TAC_3) as TAC3


    It is giving lot of errors. Can you please help me.. attached are the
    tables




    --


    https://github.com/bearrito
    @barrettsmash


    --


    https://github.com/bearrito
    @barrettsmash

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupuser @
categoriespig, hadoop
postedApr 22, '13 at 8:59p
activeApr 22, '13 at 10:10p
posts4
users2
websitepig.apache.org

2 users in discussion

j.barrett Strausser: 2 posts Raj hadoop: 2 posts

People

Translate

site design / logo © 2021 Grokbase