Fri Aug 19 22:05:31 2022
(all are still regularly updated as of roughly the above date; I apologize for any organizational issues and the raw nature of this data, there's a lot to manage and a lot coming in while still trying to analyze manually to a certain degree while monitoring services; I also have a disorganized mess of a mind)
https://bcable.net/analysis-ukr-prelim.html
https://bcable.net/analysis-ukr-graphs.html
https://bcable.net/analysis-ukr-indicators.html
https://bcable.net/analysis-ukr-ru_map_sessions.html
https://bcable.net/analysis-ukr-cn_map_sessions.html
https://bcable.net/analysis-ukr-miori_fail.html
https://bcable.net/analysis-ukr-botnet_perl.html
https://bcable.net/analysis-ukr-ddos_gh0st.html
https://bcable.net/analysis-ukr-indicators_2023.html
https://bcable.net/analysis-ukr-crew_001.html
https://bcable.net/analysis-ukr-inventory_attack.html
https://bcable.net/analysis-ukr-crew_002.html
library(Rwhois)
library(ggplot2)
library(sf)
## Linking to GEOS 3.9.2, GDAL 3.3.3, PROJ 8.2.1; sf_use_s2() is TRUE
library(wk)
china <- read_sf("redacted/geomaps/cn/gadm40_CHN_1.shp")
https://www.downloadexcelfiles.com/cn_en/download-excel-file-list-provinces-china
provinces_list <- read.csv(
"redacted/geomaps/cn/list_of_provinces_of_china-70j.csv"
)
province_names <- provinces_list$Province[provinces_list$Province != ""]
chinese_hosts <- read.csv("chinese_hosts.csv")
cn_map_cleanup_list <- list(
c(" (municipality|province|autonomous region|special administrative region)", ""),
c("[^a-z]", "")
)
cn_mirror_map <- list(
c("guangxi", "guangxizhuang"),
c("ningxia", "ningxiahui"),
c("innermongolia", "neimongol"),
c("tibet", "xizang"),
c("xinjiang", "xinjianuygur")
)
cn_mirror_geo <- list(
c("innermongolia", "neimongol"),
c("ningxiahui", "ningxiahuihui"),
c("tibet", "xizang"),
c("xinjianguygur", "xinjianguyguruygur")
)
https://bcable.net/x/Rproj/shared
source("shared/geo_provinces.R")
if(!file.exists("chinese_hosts_geo.csv")){
chinese_hosts_whois <- Rwhois::whois_query(chinese_hosts$remote_host)
ret_provinces <- sapply(
chinese_hosts_whois, FUN=function(x){ find_province(
x$val,
province_names, cn_map_cleanup_list, cn_mirror_map, cn_mirror_geo
) }
)
chinese_hosts_geo <- chinese_hosts
chinese_hosts_geo$province <- ret_provinces
write.csv(chinese_hosts_geo, "chinese_hosts_geo.csv", row.names=FALSE)
} else {
chinese_hosts_geo <- read.csv("chinese_hosts_geo.csv")
if(!file.exists("chinese_hosts_geo_new.csv")){
chinese_hosts_new <- chinese_hosts[
!(chinese_hosts$remote_host %in% chinese_hosts_geo$remote_host),
]
chinese_hosts_new_whois <- Rwhois::whois_query(
chinese_hosts_new$remote_host
)
ret_provinces <- sapply(
chinese_hosts_new_whois, FUN=function(x){ find_province(
x$val,
province_names, cn_map_cleanup_list,
cn_mirror_map, cn_mirror_geo
) }
)
chinese_hosts_new_geo <- chinese_hosts_new
chinese_hosts_new_geo$province <- ret_provinces
chinese_hosts_geo <- rbind(
chinese_hosts_geo, chinese_hosts_new_geo
)
write.csv(chinese_hosts_geo, "chinese_hosts_geo_new.csv", row.names=FALSE)
}
}
## [1] "Error (WHOIS Server: whois.lacnic.net; Hostname Input: 181.143.224.92)"
## <simpleError in read.socket(conn): Error reading data in Rsockread>
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error (WHOIS Server: whois.lacnic.net; Hostname Input: 177.104.236.12)"
## <simpleError in read.socket(conn): Error reading data in Rsockread>
## [1] "Error (WHOIS Server: whois.lacnic.net; Hostname Input: 177.104.236.146)"
## <simpleError in read.socket(conn): Error reading data in Rsockread>
## [1] "Error (WHOIS Server: whois.lacnic.net; Hostname Input: 187.95.68.96)"
## <simpleError in read.socket(conn): Error reading data in Rsockread>
## [1] "Error (WHOIS Server: whois.lacnic.net; Hostname Input: 177.84.149.184)"
## <simpleError in read.socket(conn): Error reading data in Rsockread>
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error in make.socket(server, 43): socket not established\n on connection, retrying..."
## [1] "Error in write.socket(conn, hostname): object 'conn' not found\n on header write, retrying..."
## [1] "Error in write.socket(conn, hostname): object 'conn' not found\n on header write, retrying..."
## [1] "Error in write.socket(conn, hostname): object 'conn' not found\n on header write, retrying..."
## [1] "Error in write.socket(conn, hostname): object 'conn' not found\n on header write, retrying..."
## [1] "Error in write.socket(conn, hostname): object 'conn' not found\n on header write, retrying..."
## [1] "Error in write.socket(conn, \"\\r\\n\"): object 'conn' not found\n on header finalize, retrying..."
## [1] "Error in write.socket(conn, \"\\r\\n\"): object 'conn' not found\n on header finalize, retrying..."
## [1] "Error in write.socket(conn, \"\\r\\n\"): object 'conn' not found\n on header finalize, retrying..."
## [1] "Error in write.socket(conn, \"\\r\\n\"): object 'conn' not found\n on header finalize, retrying..."
## [1] "Error in write.socket(conn, \"\\r\\n\"): object 'conn' not found\n on header finalize, retrying..."
## Error in strsplit(data, "\n"): non-character argument
chinese_hosts_geo$merge.col <- cleanup_province(
chinese_hosts_geo$province, cn_map_cleanup_list, cn_mirror_geo
)
china$merge.col <- cleanup_province(
china$NAME_1, cn_map_cleanup_list, cn_mirror_map
)
agg_provinces <- aggregate(count ~ merge.col, data=chinese_hosts_geo, FUN=sum)
agg_provinces
## merge.col count
## 1 anhui 4112
## 2 beijing 66597
## 3 chongqing 635
## 4 fujian 1897
## 5 gansu 312
## 6 guangdong 14959
## 7 guangxizhuang 697
## 8 guizhou 594
## 9 hainan 274
## 10 hebei 3132
## 11 heilongjiang 1054
## 12 henan 16652
## 13 hongkong 1183
## 14 hubei 45610
## 15 hunan 1162
## 16 jiangsu 13967
## 17 jiangxi 2283
## 18 jilin 904
## 19 liaoning 1961
## 20 neimongol 189
## 21 ningxiahuihui 82
## 22 qinghai 147
## 23 shaanxi 489
## 24 shandong 13827
## 25 shanghai 9425
## 26 shanxi 3434
## 27 sichuan 4114
## 28 taiwan 53
## 29 tianjin 432
## 30 xizang 73
## 31 yunnan 520
## 32 zhejiang 35953
(Another bad map, Taiwan is not in China it's independent… oh well, the actual shape files don't include Taiwan so it doesn't matter as it doesn't show up)
china_data <- merge(china, agg_provinces, by="merge.col", all.x=TRUE)
g <- ggplot(china_data)
g <- g + labs(
title="CO.UA Honeypot: Established Sessions by Chinese Region (From WHOIS Data)",
fill="Sessions"
)
g <- g + scale_fill_viridis_c()
g <- g + geom_sf(aes(geometry=geometry, fill=count))
g <- g + theme_bw()
g <- g + theme(
plot.margin = margin(0.2, 0.2, 0.2, 0.2, "cm")
)
g
Geolocation based on IP address is not to be taken as entirely accurate as to the source of traffic or attacks conducted. There are many reasons for this, which include (but are not limited to):
Large quantities of traffic, especially attack based traffic, will use a VPN or the Tor network (or some reasonable facsimile), to mask the origin of the traffic. This will in turn change the appearance of the location of origin. Usually, an attacker will also intentionally want the traffic to appear to come from somewhere that has some form of lesser legal jurisdiction, some form of lesser ability to police traffic, or come from a well known source of malicious attacks such as China or Russia.
For instance, the following log entry was generated by myself against my servers while sitting at my desk in the United States, but it gets geolocated as Russia because of how the packet was sent. This sort of masking is trivial to perform, even by a nine year old on a cellphone.
httpd_data[grep("/from/russia/with/logs", httpd_data$Request), c("Request", "Response.Code", "Country.Code")]
## Request Response.Code Country.Code
## 1 GET /from/russia/with/logs HTTP/1.1 404 RU
Some locations will have a higher distribution of virtual servers than others, such as Silicon Valley or China. This can lead to larger quantities of vulnerable virtual machines and servers in those regions, and distort the resulting aggregate data.
It is possible that due to address assignment for governmental intelligence purposes or other economic or political reasons a nation could re-allocate address space and forge the identity similarly to a NAT (network address translation). They could also funnel information via VPN technologies for another nation.
Because most of these agreements are made in private, and due to the fact that most geolocation, RDAP, and WHOIS records are based on self-reporting, it is impossible to know the 100% true nature of geographic address assignment.
This geolocation uses the rgeolocate package available in CRAN, and uses the internal country database that is shipped with it. There could be an error in the database shipped, there could be an error in the lookup code, etc. Bugs happen. I have no reason to believe that any false geolocation is being performed by these packages, however.
Also used is the self-reported RDAP or WHOIS systems which can frequently be self-reported falsely or misleadingly. Which of the systems (RDAP, WHOIS, or rgeolocate) used are disclosed when necessary.
Despite these weaknesses, this doesn't change the fact that looking at this sort of data can be quite fun and interesting, and potentially enlightening. Generalized conclusions should not be made from this data or the maps herein. You have been warned.