Sunday, December 1, 2013

SQL Code for MLB Players' Career Batting Data

The MS Access SQL code below can be used to query the Lahman Baseball Database for the MLB players' career batting dataset:

SELECT trips.*,
    lookup_fielding.Primary_Position,
    lookup_fielding.Games_at_Primary_Position
FROM    (

    SELECT  deuce.*,
        IIF(lookup_allstar.count_distinct_seasons IS NULL, 0, lookup_allstar.count_distinct_seasons) as AllStar_Seasons
    FROM     (
        SELECT  ace.*,
            IIF(ace.finalGame IS NULL AND cdate(ace.debut) >= #01/01/1982#, 'Yes', 'No') as Active,
            IIF(lookup_hof.inducted = 'Y', 'Yes', 'No') as Hall_Of_Fame,
            IIF(  IIF(lookup_hof.inducted = 'Y', 'Yes', 'No') = 'Yes', 'HOF',
                      IIF( IIF(ace.finalGame IS NULL AND cdate(ace.debut) >= #01/01/1982#, 'Yes', 'No') = 'Yes', 'Active', 'Not HOF')) as Status
        FROM     (
            SELECT
                Master.playerID, Master.hofID, Master.nameFirst, Master.nameLast, Master.debut, Master.finalGame,
                 sub.G, sub.AB, sub.R, sub.H, sub.Doubles, sub.Triples, sub.HR, sub.RBI, sub.SB, sub.CS, sub.BB, sub.SO, sub.IBB, sub.HBP, sub.SH, sub.SF, sub.GIDP,
                (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) as TB,
                round( (sub.H + sub.BB - sub.CS + sub.HBP - sub.GIDP)*( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + 0.26*(sub.BB - sub.IBB + sub.HBP) + 0.52*(sub.SH + sub.SF + sub.SB) )/(sub.AB + sub.BB + sub.HBP + sub.SH + sub.SF), 1) as RC,
                round( sub.H/sub.AB, 3) as BA,
                round( (sub.H + sub.BB + sub.HBP)/(sub.AB + sub.BB + sub.HBP + sub.SF), 3 ) as OBP,
                round( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB, 3) as SLG,
                round( (sub.H + sub.BB + sub.HBP)/(sub.AB + sub.BB + sub.HBP + sub.SF) + (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB, 3) as OPS,
                round( ((sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + sub.BB + sub.HBP + sub.SB - sub.CS)/(sub.AB - sub.H + sub.CS + sub.GIDP), 3) as TA,
                round( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB - sub.H/sub.AB, 3) as ISO,
                round( ((sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) - sub.H + sub.BB + sub.SB - sub.CS)/sub.AB, 3) as SECA,
                round( 27*( (sub.H + sub.BB - sub.CS + sub.HBP - sub.GIDP)*( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + 0.26*(sub.BB - sub.IBB + sub.HBP) + 0.52*(sub.SH + sub.SF + sub.SB) )/(sub.AB + sub.BB + sub.HBP + sub.SH + sub.SF) )/(sub.AB - sub.H + sub.SH + sub.SF + sub.CS + sub.GIDP), 3) as RC27
            FROM     (
                SELECT
                    Batting.playerID,
                    Sum(Batting.G) as G,
                    Sum(Batting.AB) as AB,
                    Sum(Batting.R) as R,
                    Sum(Batting.H) as H,
                    Sum(Batting.[2B]) as Doubles,
                    Sum(Batting.[3B]) as Triples,
                    Sum(Batting.HR) as HR,
                    Sum(Batting.RBI) as RBI,
                    Sum(Batting.SB) as SB,
                    IIF(Sum(Batting.CS) IS NULL, 0, Sum(Batting.CS)) as CS,
                    IIF(Sum(Batting.BB) IS NULL, 0, Sum(Batting.BB)) as BB,
                    IIF(Sum(Batting.SO) IS NULL, 0, Sum(Batting.SO)) as SO,
                    IIF(Sum(Batting.IBB) IS NULL, 0, Sum(Batting.IBB)) as IBB,
                    IIF(Sum(Batting.HBP) IS NULL, 0, Sum(Batting.HBP)) as HBP,
                    IIF(Sum(Batting.SH) IS NULL, 0, Sum(Batting.SH)) as SH,
                    IIF(Sum(Batting.SF) IS NULL, 0, Sum(Batting.SF)) as SF,
                    IIF(Sum(Batting.GIDP) IS NULL, 0, Sum(Batting.GIDP)) as GIDP
                FROM         Batting
                GROUP BY     Batting.playerID
                ) as sub INNER JOIN Master
                      ON sub.playerID = Master.playerID
                WHERE   sub.AB >= 100
            GROUP BY     Master.playerID, Master.hofID, Master.nameFirst, Master.nameLast, Master.debut, Master.finalGame,
                     sub.G, sub.AB, sub.R, sub.H, sub.Doubles, sub.Triples, sub.HR, sub.RBI, sub.SB, sub.CS, sub.BB, sub.SO, sub.IBB, sub.HBP, sub.SH, sub.SF, sub.GIDP,
                     (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR),
                    round( (sub.H + sub.BB - sub.CS + sub.HBP - sub.GIDP)*( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + 0.26*(sub.BB - sub.IBB + sub.HBP) + 0.52*(sub.SH + sub.SF + sub.SB) )/(sub.AB + sub.BB + sub.HBP + sub.SH + sub.SF), 1),
                    round( sub.H/sub.AB, 3),
                    round( (sub.H + sub.BB + sub.HBP)/(sub.AB + sub.BB + sub.HBP + sub.SF), 3 ),
                    round( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB, 3),
                    round( (sub.H + sub.BB + sub.HBP)/(sub.AB + sub.BB + sub.HBP + sub.SF) + (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB, 3),
                    round( ((sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + sub.BB + sub.HBP + sub.SB - sub.CS)/(sub.AB - sub.H + sub.CS + sub.GIDP), 3),
                    round( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR)/sub.AB - sub.H/sub.AB, 3),
                    round( ((sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) - sub.H + sub.BB + sub.SB - sub.CS)/sub.AB, 3),
                    round( 27*( (sub.H + sub.BB - sub.CS + sub.HBP - sub.GIDP)*( (sub.H + sub.Doubles + 2*sub.Triples + 3*sub.HR) + 0.26*(sub.BB - sub.IBB + sub.HBP) + 0.52*(sub.SH + sub.SF + sub.SB) )/(sub.AB + sub.BB + sub.HBP + sub.SH + sub.SF) )/(sub.AB - sub.H + sub.SH + sub.SF + sub.CS + sub.GIDP), 3)
            ORDER BY Master.playerID asc
            ) as ace LEFT OUTER JOIN (
                         SELECT hofID, yearid, inducted, category
                          FROM     HallOfFame
                          WHERE     inducted = 'Y'
                               AND category = 'Player'
                            ) as lookup_hof
                                  ON ace.hofID = lookup_hof.hofID
        ) as deuce LEFT OUTER JOIN (
                       SELECT playerID,
                          COUNT(*) as count_distinct_seasons
                          FROM (
                        SELECT DISTINCT   playerID,
                                  yearID
                        FROM AllstarFull
                            ) as temp
                          GROUP BY playerID
                              ) as lookup_allstar
                                 ON deuce.playerID = lookup_allstar.playerID
    ) as trips LEFT OUTER JOIN (         
                   SELECT DISTINCT     leo.playerID,
                            rex.Primary_Position,
                            leo.Games_at_Primary_Position
                   FROM    (
                    SELECT DISTINCT playerID, max(Games_at_POS) as Games_at_Primary_Position
                    FROM     (
                        SELECT DISTINCT playerID,
                                POS,
                                sum(G) as Games_at_POS
                        FROM     Fielding
                        WHERE NOT(POS = 'OF' AND yearID >= 1954)
                        GROUP BY playerID, POS
                        ORDER BY playerID, sum(G) desc
                        ) as sub_field
                    GROUP BY playerID
                    ) as leo INNER JOIN (
                                    SELECT DISTINCT     playerID,
                                            POS as Primary_Position,
                                            sum(G) as Games_at_Primary_Position
                                    FROM     Fielding
                                WHERE NOT(POS = 'OF' AND yearID >= 1954)              
                                    GROUP BY playerID, POS
                                    ORDER BY playerID, sum(G) desc
                                    ) as rex
                                   ON leo.playerID = rex.playerID AND leo.Games_at_Primary_Position = rex.Games_at_Primary_Position
                   GROUP BY leo.playerID, rex.Primary_Position, leo.Games_at_Primary_Position
                   ) as lookup_fielding
                         ON trips.playerID = lookup_fielding.playerID
ORDER BY trips.playerID asc
;