library(pygaR)
library(ggplot2)
d <- theme_bw()
d <- d + theme(
axis.text.x = element_text(angle=90, size=15),
axis.title = element_text(size=20),
plot.title = element_text(size=30)
)
def_theme <- d
master_data <- pygar_master(startqtr=199301, endqtr=201701)
simpler_master_data <- data.frame(
Year.Quarter=factor(master_data$Quarter),
CIK=master_data$CIK, Form.Type=factor(master_data$Form.Type),
Date.Filed=master_data$Date.Filed
)
master_data <- simpler_master_data
master_data$Day <- substr(master_data$Date.Filed, 6, 10)
master_agg_length <- aggregate(CIK ~ Year.Quarter, data=master_data, FUN=length)
names(master_agg_length) <- c("Year.Quarter", "Filings")
master_agg_length$Year <- factor(substr(master_agg_length$Year.Quarter, 0, 4))
master_agg_length$Quarter <- factor(
substr(master_agg_length$Year.Quarter, 6, 7)
)
g <- ggplot(master_agg_length, aes(x=Year.Quarter, y=Filings/1000, group=1))
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by All Quarters")
g
g <- ggplot(master_agg_length, aes(x=Year.Quarter, y=Filings/1000, group=1))
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + theme(axis.text.x = element_text(angle=90, size=12))
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by All Quarters")
g
g <- ggplot(master_agg_length,
aes(x=Year, y=Filings/1000, group=Quarter, colour=Quarter)
)
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Filings by Year Grouped by Quarter")
g
master_agg_unique <- aggregate(CIK ~ Year.Quarter, data=master_data, FUN=unique)
master_agg_unique$CIK <- as.integer(as.vector(unlist(
lapply(master_agg_unique$CIK, FUN=length)
)))
names(master_agg_unique) <- c("Year.Quarter", "Unique.CIK")
master_agg_unique$Year <- factor(substr(master_agg_unique$Year.Quarter, 0, 4))
master_agg_unique$Quarter <- factor(
substr(master_agg_unique$Year.Quarter, 6, 7)
)
g <- ggplot(master_agg_unique,
aes(x=Year, y=Unique.CIK, group=Quarter, colour=Quarter)
)
g <- g + geom_line() + geom_point()
g <- g + def_theme
g <- g + ggtitle("Unique CIK Grouped by Quarter")
g
master_aggday_length <- aggregate(CIK ~ Day, data=master_data, FUN=length)
names(master_aggday_length) <- c("Day", "Filings")
g <- ggplot(master_aggday_length, aes(x=Day, y=Filings, group=1))
g <- g + geom_line() + geom_point()
g <- g + def_theme + theme(axis.text.x = element_blank())
g <- g + ggtitle("Filings by Day of Year")
g
master_formagg_length <- aggregate(
CIK ~ Form.Type, data=master_data, FUN=length
)
names(master_formagg_length) <- c("Form.Type", "Filings")
g <- ggplot(master_formagg_length, aes(x=Form.Type, y=Filings/1000), FUN=length)
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Form Type Frequencies")
g
length(master_formagg_length[,1])
## [1] 666
<insert commentary about wall street here>
master_formagg_length$Form.Type
## [1] 1 1-A 1-A POS 1-A-W 1-A-W/A 1-A/A
## [7] 1-E 1-E AD 1-E/A 1-K 1-K/A 1-SA
## [13] 1-SA/A 1-U 1-U/A 1-Z 1-Z/A 1/A
## [19] 10-12B 10-12B/A 10-12G 10-12G/A 10-C 10-C/A
## [25] 10-D 10-D/A 10-K 10-K/A 10-K405 10-K405/A
## [31] 10-KSB 10-KSB/A 10-KT 10-KT/A 10-Q 10-Q/A
## [37] 10-QSB 10-QSB/A 10-QT 10-QT/A 10KSB 10KSB/A
## [43] 10KSB40 10KSB40/A 10KT405 10KT405/A 10QSB 10QSB/A
## [49] 10SB12B 10SB12B/A 10SB12G 10SB12G/A 11-K 11-K/A
## [55] 11-KT 11-KT/A 12G3-2B 12G32BR 13F-E 13F-E/A
## [61] 13F-HR 13F-HR/A 13F-NT 13F-NT/A 13FCONP 13FCONP/A
## [67] 144 144/A 15-12B 15-12B/A 15-12G 15-12G/A
## [73] 15-15D 15-15D/A 15F-12B 15F-12B/A 15F-12G 15F-12G/A
## [79] 15F-15D 15F-15D/A 18-12B 18-K 18-K/A 19B-4
## [85] 19B-4E 2-A 2-A/A 2-AF 2-E 2-E/A
## [91] 20-F 20-F/A 20FR12B 20FR12B/A 20FR12G 20FR12G/A
## [97] 24F-1 24F-2EL 24F-2EL/A 24F-2NT 24F-2NT/A 24F-2TM
## [103] 25 25-NSE 25-NSE/A 25/A 253G1 253G2
## [109] 253G3 26 3 3/A 305B2 305B2/A
## [115] 34-12H 35-APP 35-APP/A 35-CERT 35-CERT/A 39-304D
## [121] 39-304D/A 39-310B 4 4/A 40-17F1 40-17F1/A
## [127] 40-17F2 40-17F2/A 40-17G 40-17G/A 40-17GCS 40-202A
## [133] 40-202A/A 40-203A 40-203A/A 40-205E 40-205E/A 40-206A
## [139] 40-206A/A 40-24B2 40-24B2/A 40-33 40-33/A 40-6B
## [145] 40-6B/A 40-6C 40-6C/A 40-8B25 40-8F-2 40-8F-2/A
## [151] 40-8F-A 40-8F-A/A 40-8F-B 40-8F-B/A 40-8F-L 40-8F-L/A
## [157] 40-8F-M 40-8F-M/A 40-8FC 40-8FC/A 40-APP 40-APP/A
## [163] 40-F 40-F/A 40-OIP 40-OIP/A 40-RPT 40FR12B
## [169] 40FR12B/A 40FR12G 40FR12G/A 424A 424B1 424B2
## [175] 424B3 424B4 424B5 424B7 424B8 424H
## [181] 424H/A 425 485A24E 485A24F 485APOS 485B24E
## [187] 485B24F 485BPOS 485BXT 485BXTF 486A24E 486APOS
## [193] 486B24E 486BPOS 487 497 497AD 497H2
## [199] 497J 497K 497K1 497K2 497K3A 497K3B
## [205] 5 5/A 6-K 6-K/A 6B NTC 6B ORDR
## [211] 8-A12B 8-A12B/A 8-A12G 8-A12G/A 8-B12B 8-B12B/A
## [217] 8-B12G 8-B12G/A 8-K 8-K/A 8-K12B 8-K12B/A
## [223] 8-K12G3 8-K12G3/A 8-K15D5 8-K15D5/A 8-M 8A12BEF
## [229] 8A12BT 8A12BT/A 8F-2 NTC 8F-2 ORDR 9-M ABS-15G
## [235] ABS-15G/A ABS-EE ABS-EE/A ADB ADN-MTL ADV-E
## [241] ADV-H-C ADV-H-T ADV-NR ADV/A ADVCO ADVW
## [247] AFDB AFDB/A ANNLRPT ANNLRPT/A APP NTC APP ORDR
## [253] APP WD APP WD/A APP WDG ARS ARS/A AW
## [259] AW WD BDCO BW-2 BW-3 C C-U
## [265] C-W C/A C/A-W CB CB/A CERT
## [271] CERTAMX CERTARCA CERTBATS CERTBSE CERTCSE CERTNAS
## [277] CERTNYS CERTPAC CERTPBS CFPORTAL CFPORTAL-W CFPORTAL/A
## [283] CORRESP CT ORDER D D/A DEF 14A DEF 14C
## [289] DEF-OC DEF13E3 DEF13E3/A DEFA14A DEFA14C DEFC14A
## [295] DEFC14C DEFM14A DEFM14C DEFN14A DEFR14A DEFR14C
## [301] DEFS14A DEFS14C DEL AM DFAN14A DFRN14A DOS
## [307] DOS/A DOSLTR DRS DRS/A DRSLTR DSTRBRPT
## [313] DSTRBRPT/A EBRD EBRD/A EFFECT F-1 F-1/A
## [319] F-10 F-10/A F-10EF F-10POS F-1MEF F-2
## [325] F-2/A F-3 F-3/A F-3ASR F-3D F-3DPOS
## [331] F-3MEF F-4 F-4 POS F-4/A F-4MEF F-6
## [337] F-6 POS F-6/A F-6EF F-7 F-7 POS F-7/A
## [343] F-8 F-8 POS F-8/A F-80 F-80/A F-80POS
## [349] F-9 F-9 POS F-9/A F-9EF F-N F-N/A
## [355] F-X F-X/A FOCUSN FOCUSN/A FWP G-405
## [361] G-405/A G-405N G-405N/A G-FIN G-FIN/A G-FINW
## [367] IADB ID-NEWCIK IFC IRANNOTICE MA MA-A
## [373] MA-I MA-I/A MA-W MA/A MSD MSD/A
## [379] MSDCO MSDW N-1 N-1/A N-14 N-14 8C
## [385] N-14 8C/A N-14/A N-14AE N-14AE/A N-14MEF N-18F1
## [391] N-18F1/A N-1A N-1A EL N-1A EL/A N-1A/A N-2
## [397] N-2/A N-23C-1 N-23C-1/A N-23C-2 N-23C-2/A N-23C3A
## [403] N-23C3A/A N-23C3B N-23C3B/A N-23C3C N-23C3C/A N-27D-1
## [409] N-2MEF N-3 N-3 EL N-3 EL/A N-3/A N-30B-2
## [415] N-30D N-30D/A N-4 N-4 EL N-4 EL/A N-4/A
## [421] N-5 N-5/A N-54A N-54A/A N-54C N-54C/A
## [427] N-6 N-6/A N-6F N-6F/A N-8A N-8A/A
## [433] N-8B-2 N-8B-2/A N-8B-4 N-8F N-8F NTC N-8F ORDR
## [439] N-8F/A N-CR N-CR/A N-CSR N-CSR/A N-CSRS
## [445] N-CSRS/A N-MFP N-MFP/A N-MFP1 N-MFP1/A N-MFP2
## [451] N-MFP2/A N-PX N-PX/A N-Q N-Q/A N14AE24
## [457] N14AE24/A N14EL24 N14EL24/A NO ACT NRSRO-CE NRSRO-CE/A
## [463] NRSRO-UPD NSAR-A NSAR-A/A NSAR-AT NSAR-AT/A NSAR-B
## [469] NSAR-B/A NSAR-BT NSAR-BT/A NSAR-U NSAR-U/A NT 10-D
## [475] NT 10-D/A NT 10-K NT 10-K/A NT 10-Q NT 10-Q/A NT 11-K
## [481] NT 11-K/A NT 15D2 NT 15D2/A NT 20-F NT 20-F/A NT N-MFP
## [487] NT N-MFP1 NT N-MFP2 NT-NCSR NT-NCSR/A NT-NSAR NT-NSAR/A
## [493] NTFNCSR NTFNSAR NTN 10D NTN 10K NTN 10Q NTN 11K
## [499] NTN 20F NTN15D2 OIP NTC OIP ORDR POS 8C POS AM
## [505] POS AMC POS AMI POS EX POS462B POS462C POSASR
## [511] PRE 14A PRE 14C PRE13E3 PRE13E3/A PREA14A PREA14C
## [517] PREC14A PREC14C PREM14A PREM14C PREN14A PRER14A
## [523] PRER14C PRES14A PRES14C PRRN14A PX14A6G PX14A6N
## [529] QRTLYRPT QRTLYRPT/A QUALIF REG-NR REG-NR/A REGDEX
## [535] REGDEX/A REVOKED RW RW WD S-1 S-1/A
## [541] S-11 S-11/A S-11MEF S-1MEF S-2 S-2/A
## [547] S-20 S-20/A S-2MEF S-3 S-3/A S-3ASR
## [553] S-3D S-3D/A S-3DPOS S-3MEF S-4 S-4 POS
## [559] S-4/A S-4EF S-4EF/A S-4MEF S-6 S-6/A
## [565] S-6EL24 S-6EL24/A S-8 S-8 POS S-8/A S-B
## [571] S-B/A S-BMEF SB-1 SB-1/A SB-1MEF SB-2
## [577] SB-2/A SB-2MEF SC 13D SC 13D/A SC 13E1 SC 13E1/A
## [583] SC 13E3 SC 13E3/A SC 13E4 SC 13E4/A SC 13G SC 13G/A
## [589] SC 14D1 SC 14D1/A SC 14D9 SC 14D9/A SC 14F1 SC 14F1/A
## [595] SC 14N SC 14N/A SC TO-C SC TO-I SC TO-I/A SC TO-T
## [601] SC TO-T/A SC13E4F SC13E4F/A SC14D1F SC14D1F/A SC14D9C
## [607] SC14D9F SC14D9F/A SD SD/A SDR SE
## [613] SF-1 SF-1/A SF-3 SF-3/A SL SP 15D2
## [619] SP 15D2/A STOP ORDER SUPPL T-3 T-3/A TA-1
## [625] TA-1/A TA-2 TA-2/A TA-W TACO TH
## [631] TTW TTW/A U-1 U-1/A U-12-IA U-12-IA/A
## [637] U-12-IB U-12-IB/A U-13-60 U-13-60/A U-13E-1 U-33-S
## [643] U-33-S/A U-3A-2 U-3A-2/A U-3A3-1 U-57 U-57/A
## [649] U-6B-2 U-6B-2/A U-7D U-7D/A U-9C-3 U-9C-3/A
## [655] U5A U5A/A U5B U5B/A U5S U5S/A
## [661] UNDER UNDER/A UPLOAD WDL-REQ X-17A-5 X-17A-5/A
## 666 Levels: 1 1-A 1-A POS 1-A-W 1-A-W/A 1-A/A 1-E 1-E AD 1-E/A ... X-17A-5/A
master_formagg_length <- master_formagg_length[
order(-master_formagg_length$Filings),
]
bottom_value <- master_formagg_length$Filings[
length(master_formagg_length$Filings) * 0.05
]
master_formagg_length <- master_formagg_length[
master_formagg_length$Filings > bottom_value,
]
g <- ggplot(master_formagg_length, aes(x=Form.Type, y=Filings/1000), FUN=length)
g <- g + geom_bar(stat="identity")
g <- g + def_theme
g <- g + theme(axis.text.x = element_text(angle=90, size=12))
g <- g + ylab("Filings (Thousands)")
g <- g + ggtitle("Top 5% of Form Type Frequencies")
g