pygaR: Example for pygar_master() Function

Load Libraries

library(pygaR)
library(ggplot2)

Setup Default ggplot2 Theme

d <- theme_bw()
d <- d + theme(
    axis.text.x = element_text(angle=90, size=15),
    axis.title = element_text(size=20),
    plot.title = element_text(size=30)
)
def_theme <- d

Data Analysis

Gather All Data

master_data <- pygar_master(startqtr=199301, endqtr=201701)
simpler_master_data <- data.frame(
    Year.Quarter=factor(master_data$Quarter),
    CIK=master_data$CIK, Form.Type=factor(master_data$Form.Type),
    Date.Filed=master_data$Date.Filed
)
master_data <- simpler_master_data
master_data$Day <- substr(master_data$Date.Filed, 6, 10)

Aggregate Filings by All Quarters

master_agg_length <- aggregate(CIK ~ Year.Quarter, data=master_data, FUN=length)

names(master_agg_length) <- c("Year.Quarter", "Filings")
master_agg_length$Year <- factor(substr(master_agg_length$Year.Quarter, 0, 4))
master_agg_length$Quarter <- factor(
    substr(master_agg_length$Year.Quarter, 6, 7)
)

Filings Bar Chart by All Quarters

g <- ggplot(master_agg_length, aes(x=Year.Quarter, y=Filings/1000, group=1))
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by All Quarters")
g

plot of chunk master_agg_length_bar

Filings Line Graph by All Quarters

g <- ggplot(master_agg_length, aes(x=Year.Quarter, y=Filings/1000, group=1))
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + theme(axis.text.x = element_text(angle=90, size=12))
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by All Quarters")
g

plot of chunk master_agg_length_line

Filings Line Graph by Year Grouped by Quarter

g <- ggplot(master_agg_length,
    aes(x=Year, y=Filings/1000, group=Quarter, colour=Quarter)
)
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by Year Grouped by Quarter")
g

plot of chunk master_agg_length_qtr_line

Aggregate Unique CIK by All Quarters

master_agg_unique <- aggregate(CIK ~ Year.Quarter, data=master_data, FUN=unique)
master_agg_unique$CIK <- as.integer(as.vector(unlist(
    lapply(master_agg_unique$CIK, FUN=length)
)))

names(master_agg_unique) <- c("Year.Quarter", "Unique.CIK")
master_agg_unique$Year <- factor(substr(master_agg_unique$Year.Quarter, 0, 4))
master_agg_unique$Quarter <- factor(
    substr(master_agg_unique$Year.Quarter, 6, 7)
)

Unique CIK Line Graph Grouped by Quarter

g <- ggplot(master_agg_unique,
    aes(x=Year, y=Unique.CIK, group=Quarter, colour=Quarter)
)
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + ggtitle("Unique CIK Grouped by Quarter")
g

plot of chunk master_agg_unique_qtr_line

Aggregate Filings by Day of Year

master_aggday_length <- aggregate(CIK ~ Day, data=master_data, FUN=length)
names(master_aggday_length) <- c("Day", "Filings")

Filings Line Graph by Day of Year

g <- ggplot(master_aggday_length, aes(x=Day, y=Filings, group=1))
g <- g + geom_line() + geom_point()
g <- g + def_theme + theme(axis.text.x = element_blank())
g <- g + ggtitle("Filings by Day of Year")
g

plot of chunk master_aggday_length_line

Aggregate Form Type by All Quarters

master_formagg_length <- aggregate(
    CIK ~ Form.Type, data=master_data, FUN=length
)

names(master_formagg_length) <- c("Form.Type", "Filings")

Form Type Frequencies

g <- ggplot(master_formagg_length, aes(x=Form.Type, y=Filings/1000), FUN=length)
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Form Type Frequencies")
g

plot of chunk master_formagg_unique_bar

Total Form Types

length(master_formagg_length[,1])
## [1] 666

<insert commentary about wall street here>

All Forms

master_formagg_length$Form.Type
##   [1] 1          1-A        1-A POS    1-A-W      1-A-W/A    1-A/A     
##   [7] 1-E        1-E AD     1-E/A      1-K        1-K/A      1-SA      
##  [13] 1-SA/A     1-U        1-U/A      1-Z        1-Z/A      1/A       
##  [19] 10-12B     10-12B/A   10-12G     10-12G/A   10-C       10-C/A    
##  [25] 10-D       10-D/A     10-K       10-K/A     10-K405    10-K405/A 
##  [31] 10-KSB     10-KSB/A   10-KT      10-KT/A    10-Q       10-Q/A    
##  [37] 10-QSB     10-QSB/A   10-QT      10-QT/A    10KSB      10KSB/A   
##  [43] 10KSB40    10KSB40/A  10KT405    10KT405/A  10QSB      10QSB/A   
##  [49] 10SB12B    10SB12B/A  10SB12G    10SB12G/A  11-K       11-K/A    
##  [55] 11-KT      11-KT/A    12G3-2B    12G32BR    13F-E      13F-E/A   
##  [61] 13F-HR     13F-HR/A   13F-NT     13F-NT/A   13FCONP    13FCONP/A 
##  [67] 144        144/A      15-12B     15-12B/A   15-12G     15-12G/A  
##  [73] 15-15D     15-15D/A   15F-12B    15F-12B/A  15F-12G    15F-12G/A 
##  [79] 15F-15D    15F-15D/A  18-12B     18-K       18-K/A     19B-4     
##  [85] 19B-4E     2-A        2-A/A      2-AF       2-E        2-E/A     
##  [91] 20-F       20-F/A     20FR12B    20FR12B/A  20FR12G    20FR12G/A 
##  [97] 24F-1      24F-2EL    24F-2EL/A  24F-2NT    24F-2NT/A  24F-2TM   
## [103] 25         25-NSE     25-NSE/A   25/A       253G1      253G2     
## [109] 253G3      26         3          3/A        305B2      305B2/A   
## [115] 34-12H     35-APP     35-APP/A   35-CERT    35-CERT/A  39-304D   
## [121] 39-304D/A  39-310B    4          4/A        40-17F1    40-17F1/A 
## [127] 40-17F2    40-17F2/A  40-17G     40-17G/A   40-17GCS   40-202A   
## [133] 40-202A/A  40-203A    40-203A/A  40-205E    40-205E/A  40-206A   
## [139] 40-206A/A  40-24B2    40-24B2/A  40-33      40-33/A    40-6B     
## [145] 40-6B/A    40-6C      40-6C/A    40-8B25    40-8F-2    40-8F-2/A 
## [151] 40-8F-A    40-8F-A/A  40-8F-B    40-8F-B/A  40-8F-L    40-8F-L/A 
## [157] 40-8F-M    40-8F-M/A  40-8FC     40-8FC/A   40-APP     40-APP/A  
## [163] 40-F       40-F/A     40-OIP     40-OIP/A   40-RPT     40FR12B   
## [169] 40FR12B/A  40FR12G    40FR12G/A  424A       424B1      424B2     
## [175] 424B3      424B4      424B5      424B7      424B8      424H      
## [181] 424H/A     425        485A24E    485A24F    485APOS    485B24E   
## [187] 485B24F    485BPOS    485BXT     485BXTF    486A24E    486APOS   
## [193] 486B24E    486BPOS    487        497        497AD      497H2     
## [199] 497J       497K       497K1      497K2      497K3A     497K3B    
## [205] 5          5/A        6-K        6-K/A      6B NTC     6B ORDR   
## [211] 8-A12B     8-A12B/A   8-A12G     8-A12G/A   8-B12B     8-B12B/A  
## [217] 8-B12G     8-B12G/A   8-K        8-K/A      8-K12B     8-K12B/A  
## [223] 8-K12G3    8-K12G3/A  8-K15D5    8-K15D5/A  8-M        8A12BEF   
## [229] 8A12BT     8A12BT/A   8F-2 NTC   8F-2 ORDR  9-M        ABS-15G   
## [235] ABS-15G/A  ABS-EE     ABS-EE/A   ADB        ADN-MTL    ADV-E     
## [241] ADV-H-C    ADV-H-T    ADV-NR     ADV/A      ADVCO      ADVW      
## [247] AFDB       AFDB/A     ANNLRPT    ANNLRPT/A  APP NTC    APP ORDR  
## [253] APP WD     APP WD/A   APP WDG    ARS        ARS/A      AW        
## [259] AW WD      BDCO       BW-2       BW-3       C          C-U       
## [265] C-W        C/A        C/A-W      CB         CB/A       CERT      
## [271] CERTAMX    CERTARCA   CERTBATS   CERTBSE    CERTCSE    CERTNAS   
## [277] CERTNYS    CERTPAC    CERTPBS    CFPORTAL   CFPORTAL-W CFPORTAL/A
## [283] CORRESP    CT ORDER   D          D/A        DEF 14A    DEF 14C   
## [289] DEF-OC     DEF13E3    DEF13E3/A  DEFA14A    DEFA14C    DEFC14A   
## [295] DEFC14C    DEFM14A    DEFM14C    DEFN14A    DEFR14A    DEFR14C   
## [301] DEFS14A    DEFS14C    DEL AM     DFAN14A    DFRN14A    DOS       
## [307] DOS/A      DOSLTR     DRS        DRS/A      DRSLTR     DSTRBRPT  
## [313] DSTRBRPT/A EBRD       EBRD/A     EFFECT     F-1        F-1/A     
## [319] F-10       F-10/A     F-10EF     F-10POS    F-1MEF     F-2       
## [325] F-2/A      F-3        F-3/A      F-3ASR     F-3D       F-3DPOS   
## [331] F-3MEF     F-4        F-4 POS    F-4/A      F-4MEF     F-6       
## [337] F-6 POS    F-6/A      F-6EF      F-7        F-7 POS    F-7/A     
## [343] F-8        F-8 POS    F-8/A      F-80       F-80/A     F-80POS   
## [349] F-9        F-9 POS    F-9/A      F-9EF      F-N        F-N/A     
## [355] F-X        F-X/A      FOCUSN     FOCUSN/A   FWP        G-405     
## [361] G-405/A    G-405N     G-405N/A   G-FIN      G-FIN/A    G-FINW    
## [367] IADB       ID-NEWCIK  IFC        IRANNOTICE MA         MA-A      
## [373] MA-I       MA-I/A     MA-W       MA/A       MSD        MSD/A     
## [379] MSDCO      MSDW       N-1        N-1/A      N-14       N-14 8C   
## [385] N-14 8C/A  N-14/A     N-14AE     N-14AE/A   N-14MEF    N-18F1    
## [391] N-18F1/A   N-1A       N-1A EL    N-1A EL/A  N-1A/A     N-2       
## [397] N-2/A      N-23C-1    N-23C-1/A  N-23C-2    N-23C-2/A  N-23C3A   
## [403] N-23C3A/A  N-23C3B    N-23C3B/A  N-23C3C    N-23C3C/A  N-27D-1   
## [409] N-2MEF     N-3        N-3 EL     N-3 EL/A   N-3/A      N-30B-2   
## [415] N-30D      N-30D/A    N-4        N-4 EL     N-4 EL/A   N-4/A     
## [421] N-5        N-5/A      N-54A      N-54A/A    N-54C      N-54C/A   
## [427] N-6        N-6/A      N-6F       N-6F/A     N-8A       N-8A/A    
## [433] N-8B-2     N-8B-2/A   N-8B-4     N-8F       N-8F NTC   N-8F ORDR 
## [439] N-8F/A     N-CR       N-CR/A     N-CSR      N-CSR/A    N-CSRS    
## [445] N-CSRS/A   N-MFP      N-MFP/A    N-MFP1     N-MFP1/A   N-MFP2    
## [451] N-MFP2/A   N-PX       N-PX/A     N-Q        N-Q/A      N14AE24   
## [457] N14AE24/A  N14EL24    N14EL24/A  NO ACT     NRSRO-CE   NRSRO-CE/A
## [463] NRSRO-UPD  NSAR-A     NSAR-A/A   NSAR-AT    NSAR-AT/A  NSAR-B    
## [469] NSAR-B/A   NSAR-BT    NSAR-BT/A  NSAR-U     NSAR-U/A   NT 10-D   
## [475] NT 10-D/A  NT 10-K    NT 10-K/A  NT 10-Q    NT 10-Q/A  NT 11-K   
## [481] NT 11-K/A  NT 15D2    NT 15D2/A  NT 20-F    NT 20-F/A  NT N-MFP  
## [487] NT N-MFP1  NT N-MFP2  NT-NCSR    NT-NCSR/A  NT-NSAR    NT-NSAR/A 
## [493] NTFNCSR    NTFNSAR    NTN 10D    NTN 10K    NTN 10Q    NTN 11K   
## [499] NTN 20F    NTN15D2    OIP NTC    OIP ORDR   POS 8C     POS AM    
## [505] POS AMC    POS AMI    POS EX     POS462B    POS462C    POSASR    
## [511] PRE 14A    PRE 14C    PRE13E3    PRE13E3/A  PREA14A    PREA14C   
## [517] PREC14A    PREC14C    PREM14A    PREM14C    PREN14A    PRER14A   
## [523] PRER14C    PRES14A    PRES14C    PRRN14A    PX14A6G    PX14A6N   
## [529] QRTLYRPT   QRTLYRPT/A QUALIF     REG-NR     REG-NR/A   REGDEX    
## [535] REGDEX/A   REVOKED    RW         RW WD      S-1        S-1/A     
## [541] S-11       S-11/A     S-11MEF    S-1MEF     S-2        S-2/A     
## [547] S-20       S-20/A     S-2MEF     S-3        S-3/A      S-3ASR    
## [553] S-3D       S-3D/A     S-3DPOS    S-3MEF     S-4        S-4 POS   
## [559] S-4/A      S-4EF      S-4EF/A    S-4MEF     S-6        S-6/A     
## [565] S-6EL24    S-6EL24/A  S-8        S-8 POS    S-8/A      S-B       
## [571] S-B/A      S-BMEF     SB-1       SB-1/A     SB-1MEF    SB-2      
## [577] SB-2/A     SB-2MEF    SC 13D     SC 13D/A   SC 13E1    SC 13E1/A 
## [583] SC 13E3    SC 13E3/A  SC 13E4    SC 13E4/A  SC 13G     SC 13G/A  
## [589] SC 14D1    SC 14D1/A  SC 14D9    SC 14D9/A  SC 14F1    SC 14F1/A 
## [595] SC 14N     SC 14N/A   SC TO-C    SC TO-I    SC TO-I/A  SC TO-T   
## [601] SC TO-T/A  SC13E4F    SC13E4F/A  SC14D1F    SC14D1F/A  SC14D9C   
## [607] SC14D9F    SC14D9F/A  SD         SD/A       SDR        SE        
## [613] SF-1       SF-1/A     SF-3       SF-3/A     SL         SP 15D2   
## [619] SP 15D2/A  STOP ORDER SUPPL      T-3        T-3/A      TA-1      
## [625] TA-1/A     TA-2       TA-2/A     TA-W       TACO       TH        
## [631] TTW        TTW/A      U-1        U-1/A      U-12-IA    U-12-IA/A 
## [637] U-12-IB    U-12-IB/A  U-13-60    U-13-60/A  U-13E-1    U-33-S    
## [643] U-33-S/A   U-3A-2     U-3A-2/A   U-3A3-1    U-57       U-57/A    
## [649] U-6B-2     U-6B-2/A   U-7D       U-7D/A     U-9C-3     U-9C-3/A  
## [655] U5A        U5A/A      U5B        U5B/A      U5S        U5S/A     
## [661] UNDER      UNDER/A    UPLOAD     WDL-REQ    X-17A-5    X-17A-5/A 
## 666 Levels: 1 1-A 1-A POS 1-A-W 1-A-W/A 1-A/A 1-E 1-E AD 1-E/A ... X-17A-5/A

Look at Top 5% of Form Types

master_formagg_length <- master_formagg_length[
    order(-master_formagg_length$Filings),
]
bottom_value <- master_formagg_length$Filings[
    length(master_formagg_length$Filings) * 0.05
]
master_formagg_length <- master_formagg_length[
    master_formagg_length$Filings > bottom_value,
]

Top 5% Form Type Bar Chart

g <- ggplot(master_formagg_length, aes(x=Form.Type, y=Filings/1000), FUN=length)
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + theme(axis.text.x = element_text(angle=90, size=12))
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Top 5% of Form Type Frequencies")
g

plot of chunk master_formagg_top5perc_bar