library(pygaR)
library(ggplot2)
library(stringr)
library(XML)
d <- theme_bw()
d <- d + theme(
axis.text.x = element_text(angle=90, size=15),
axis.title = element_text(size=20),
plot.title = element_text(size=30)
)
def_theme <- d
https://www.wsj.com/articles/google-ceo-tops-other-alphabet-execs-with-200-million-pay-1493424255
Mentions the day “Friday”, translating to 2017-04-28
filings <- pygar_master(date=20170428, company='/alphabet inc/i')
filings
## CIK Company.Name Form.Type Date.Filed
## 1 1652044 Alphabet Inc. DEF 14A 20170428
## 2 1652044 Alphabet Inc. DEFA14A 20170428
## File.Name Quarter Date
## 1 edgar/data/1652044/0001308179-17-000170.txt 2 20170428
## 2 edgar/data/1652044/0001308179-17-000171.txt 2 20170428
form <- pygar_form(filings$File.Name[1])
names(form)
## [1] "Headers" "Body"
names(form$Header)
## [1] "Acceptance.Datetime" "Date.As.Of.Change"
## [3] "Conformed.Submission.Type" "Filer"
## [5] "Filed.As.Of.Date" "Sec.Document"
## [7] "Accession.Number" "Public.Document.Count"
## [9] "Effectiveness.Date" "Conformed.Period.Of.Report"
## [11] "Sec.Header"
names(form$Body[[1]])
## [1] "Filename" "Text" "Description" "Text.Type" "Type"
## [6] "Sequence"
grepl('pichai', form$Body, ignore.case=TRUE) &
grepl('schmidt', form$Body, ignore.case=TRUE)
## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE
doc <- form$Body[[1]]
writeLines(doc$Text, file("alphabet.html"))
sct_xmlparse <- function(html, xpath){
html_obj <- htmlParse(html)
xpathApply(html_obj, xpath, xmlValue)
}
sct_grab <- function(data){
#data <- gsub("\n", "", data, fixed=TRUE)
data <- str_trim(data)
data <- strsplit(data, "<TABLE")[[1]]
for(table in data){
table <- paste0(
"<TABLE", strsplit(table, "</TABLE>")[[1]][1], "</TABLE>",
collapse=""
)
if(!is.na(
grep("Salary", table) &&
grep("Bonus", table) &&
grep("Principal", table)
)){
data <- table
break
}
}
data <- gsub("(<[A-Z]+)[ \n][^>]+\">", "\\1>", data, ignore.case=TRUE)
data <- gsub(">[\n ]+<", "><", data)
data <- gsub(
"<[\\/]?(?:(?!(?:TABLE|TR|TH|TD|P|BR))[^>])+>", "", data,
perl=TRUE, ignore.case=TRUE
)
data
}
doc_table <- sct_grab(doc$Text)
## Warning: closing unused connection 6 (alphabet.html)
doc_table
[1] “
Name and | Salary | Bonus | Stock Awards | Option Awards | Non-Equity Incentive Plan Compensation | Non-Qualified Deferred Compensation Earnings | All Other Compensation | Total | ||||||||||||||||||||||||||||
Principal Position | Year | (\() | (1) | (\)) | (2) | (\() | (3) | (\)) | (\() | (\)) | (4) | (\() | (5) | (\)) | ||||||||||||||||||||||
Larry Page(6) | 2016 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
CEO, | 2015 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
Alphabet, and Co-Founder | 2014 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
Sergey Brin(6) | 2016 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
President, Alphabet, | 2015 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
and Co-Founder | 2014 | 1 | — | — | — | — | — | — | 1 | |||||||||||||||||||||||||||
Eric E. Schmidt | 2016 | 1,250,000 | — | — | — | — | 2,430,685 | 629,106 | (7) | 4,309,791 | ||||||||||||||||||||||||||
Executive Chairman, | 2015 | 1,254,808 | 6,000,000 | — | — | — | — | 783,370 | 8,038,178 | |||||||||||||||||||||||||||
Alphabet | 2014 | 1,250,000 | 6,000,000 | 100,443,838 | — | — | — | 996,934 | 108,690,772 | |||||||||||||||||||||||||||
Sundar Pichai | 2016 | 650,000 | — | 198,695,790 | — | — | — | 372,410 | (8) | 199,718,200 | ||||||||||||||||||||||||||
Chief Executive\n Officer, Google | 2015 | 652,500 | — | 99,829,142 | — | — | — | 150,460 | 100,632,102 | |||||||||||||||||||||||||||
Ruth M. Porat | 2016 | 650,000 | — | 38,313,173 | — | — | — | 110,956 | (9) | 39,074,129 | ||||||||||||||||||||||||||
Senior Vice President\n and Chief Financial Officer, Alphabet and Google | 2015 | 395,000 | 5,000,000 | 25,052,554 | — | — | — | 603,932 | 31,051,486 | |||||||||||||||||||||||||||
David C. Drummond | 2016 | 650,000 | — | — | — | — | — | 14,387 | (10) | 664,387 | ||||||||||||||||||||||||||
Senior Vice President, | 2015 | 652,500 | — | — | — | — | — | 20,323 | 672,823 | |||||||||||||||||||||||||||
Corporate Development,\n Chief Legal Officer, and Secretary, Alphabet | 2014 | 650,000 | 3,500,000 | 40,092,200 | — | — | — | 16,688 | 44,258,888 |
sct_table_df <- function(doc_table){
}
sct_parse <- function(doc_text){
doc_table <- sct_grab(doc_text)
doc_df <- sct_table_df(doc_table)
}
sct_parse(doc$Text)
alphabet_data <- pygar_master(
startqtr=201001, endqtr=201604,
cik=1652044, form="DEF 14A"
)
alphabet_data
## CIK Company.Name Form.Type Date.Filed
## 1 1652044 Alphabet Inc. DEF 14A 2016-04-29
## Filename Quarter
## 1 edgar/data/1652044/0001308179-16-000384.txt 201602
google_one <- pygar_master(qtr=201001, company='/google inc/i', form="DEF 14A")
google_cik <- google_one$CIK[1]
google_cik
## [1] 1288776
google_data <- pygar_master(
startqtr=200201, endqtr=201504,
cik=google_cik, form="DEF 14A"
)
google_data
## CIK Company.Name Form.Type Date.Filed
## 1 1288776 Google Inc. DEF 14A 2005-04-08
## 2 1288776 Google Inc. DEF 14A 2006-03-31
## 3 1288776 Google Inc. DEF 14A 2007-04-04
## 4 1288776 Google Inc. DEF 14A 2008-03-25
## 5 1288776 Google Inc. DEF 14A 2009-03-24
## 6 1288776 Google Inc. DEF 14A 2010-03-29
## 7 1288776 Google Inc. DEF 14A 2011-04-20
## 8 1288776 Google Inc. DEF 14A 2012-05-09
## 9 1288776 Google Inc. DEF 14A 2013-04-24
## 10 1288776 Google Inc. DEF 14A 2014-03-28
## 11 1288776 Google Inc. DEF 14A 2015-04-23
## Filename Quarter
## 1 edgar/data/1288776/0001193125-05-072803.txt 200502
## 2 edgar/data/1288776/0001193125-06-070406.txt 200601
## 3 edgar/data/1288776/0001193125-07-073756.txt 200702
## 4 edgar/data/1288776/0001193125-08-064574.txt 200801
## 5 edgar/data/1288776/0001193125-09-061999.txt 200901
## 6 edgar/data/1288776/0001193125-10-070028.txt 201001
## 7 edgar/data/1288776/0001193125-11-103802.txt 201102
## 8 edgar/data/1288776/0001193125-12-222158.txt 201202
## 9 edgar/data/1288776/0001308179-13-000248.txt 201302
## 10 edgar/data/1288776/0001308179-14-000114.txt 201401
## 11 edgar/data/1288776/0001308179-15-000157.txt 201502
google_docs <- NULL
for(file in google_data$Filename){
google_docs <- c(google_docs,
pygar_form(file)$Body[[1]]$Text
)
}
sct_grab(google_docs[10])
[1] “
Name\n and Principal Position | Year | Salary(1) (\() | Bonus(2) (\)) | Stock\n Awards(3) (\() | Option\n Awards(4) (\)) | Non-Equity Incentive Plan Compensation (\() | Non-Qualified\n Deferred Compensation Earnings(5) (\)) | All\n Other Compensation(6) (\() | Total\n (\)) | |||||||||
Larry\n Page(7) | 2013 | 1 | — | — | — | — | — | — | 1 | |||||||||
Chief\n Executive | 2012 | 1 | — | — | — | — | — | — | 1 | |||||||||
Officer\n and | 2011 | 1 | — | — | — | — | — | — | 1 | |||||||||
Co-Founder | ||||||||||||||||||
Sergey\n Brin(7) | 2013 | 1 | — | — | — | — | — | — | 1 | |||||||||
Co-Founder | 2012 | 1 | — | — | — | — | — | — | 1 | |||||||||
2011 | 1 | — | — | — | — | — | — | 1 | ||||||||||
Eric\n E. Schmidt(8) | 2013 | 1,250,000 | 6,000,000 | 11,365,184 | (9) | — | — | — | 708,196 | (10) | 19,323,380 | |||||||
Executive | 2012 | 1,250,000 | 6,000,000 | — | — | — | 35,320 | 343,304 | 7,628,624 | |||||||||
Chairman\n of the Board of Directors | 2011 | 937,500 | — | 55,643,040 | 38,136,040 | 6,000,000 | — | 263,682 | 100,980,262 | |||||||||
Patrick\n Pichette | 2013 | 650,000 | 3,000,000 | 1,489,917 | (11) | — | — | — | 13,159 | 5,153,076 | ||||||||
Senior\n Vice | 2012 | 650,000 | 2,800,000 | 21,964,757 | 13,314,569 | — | — | 11,780 | 38,741,106 | |||||||||
President\n and Chief Financial Officer | 2011 | 650,000 | — | 8,408,292 | 6,238,440 | 3,000,000 | — | 10,238 | 18,306,970 | |||||||||
Nikesh\n Arora | 2013 | 650,000 | 3,500,000 | 1,548,117 | (11) | — | — | — | 11,486 | 5,709,603 | ||||||||
Senior\n Vice | 2012 | 650,000 | 10,800,000 | 24,709,875 | (12) | 14,978,818 | (13) | — | — | 7,175 | 51,145,868 | |||||||
President\n and Chief Business Officer | 2011 | 650,000 | — | 11,210,865 | 8,317,778 | 3,000,000 | — | 8,910 | 23,187,553 | |||||||||
David\n C. | 2013 | 650,000 | 3,000,000 | 1,134,369 | (11) | — | — | — | 13,289 | 4,797,658 | ||||||||
Drummond | 2012 | 650,000 | 3,300,000 | 17,022,655 | 10,318,728 | — | — | 10,475 | 31,301,858 | |||||||||
Senior\n Vice President, Corporate Development, Chief Legal Officer, and Secretary | 2011 | 650,000 | — | 8,408,292 | 6,238,440 | 3,000,000 | — | 9,240 | 18,305,972 |
Still figuring out a good way to gather this info for a graph…