Last compiled on May, 2025
The second step is to find the frequencies of the selected set of
names (or X’s, in NSUM terminology). We again use the Meerten Voornamenbank that
shows name popularity by year. We do this via relatively straightforward
crawl of that website. You can see the selected names below in the
code.
Initatiating R
environment
Start out with a custom function to load a set of required
packages.
# packages and read data
rm(list = ls())
# scraper to collect popularity lists of names in the Netherlands, per year Rense Corten, Utrecht
# University, April 2021
# ----------- LOAD THE REQUIRED PACKAGES ---------------- |
# (c) Jochem Tolsma
fpackage.check <- function(packages) {
lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
}
packages = c("tidyverse", "rvest", "polite")
fpackage.check(packages)
Functions
Make a custom function that queries the Meertens
# ----------- FUNCTIONS ---------------- |
get_name_number <- function(session, name){
#year = 2014
name_path = paste("/nvb/naam/is/", name,sep="") # set the path for the specific name's webpage
name_session <-nod(session, path = name_path) # agree changing of the path with the host (assuming I have already "bowed" for the higher-level path)
name_page <- scrape(name_session) # get the page for this year
names_data <-name_page %>% # parse the page as a table. Turns out this is a list of three tables; we need numbers 2 and 3
html_table()
n_m <- names_data[[1]][["X3"]][2] %>%
str_replace("--","0")
n_v <- names_data[[1]][["X3"]][6] %>%
str_replace("--","0")
print(name)
n_total <- as.numeric(n_m) + as.numeric(n_v)
return(n_total)
}
Scraping the specific
names
The name frequencies that are queried from Meertens are found below.
These are then put into a dataframe.
# ----------- IMPLEMENT THE SCRAPER ---------------- |
namelist <- c("Sophie", "Julia", "Sanne", "Lisa", "Laura", "Maria", "Linda", "Johanna", "Monique", "Ester",
"Anna", "Elisabeth", "Cornelia", "Wilhelmina", "Amira", "Samira", "Sara", "Daan", "Sem", "Thomas",
"Max", "Kevin", "Johannes", "Dennis", "Jeroen", "Jan", "Marcel", "Cornelis", "Hendrik", "Petrus",
"Willem", "Ali", "Mohammed", "Noor")
name_number <- c()
# check permissions and introduce myself to the host
session <- bow("https://www.meertens.knaw.nl/nvb/", user_agent = "R. Corten, Universiteit Utrecht", delay = 1)
session
for (i in 1:length(namelist)) {
name_number[i] <- get_name_number(session, namelist[i])
}
names_numbers <- data.frame(name = namelist, number = name_number)
Saving the data
We finally save the data in a CSV file.
write.csv(names_numbers, file = "name_numbers.csv", row.names = FALSE)
LS0tCnRpdGxlOiAiU2VsZWN0aW5nIHRoZSBuYW1lcyAoWCdzKSIKI2JpYmxpb2dyYXBoeTogcmVmZXJlbmNlcy5iaWIKYXV0aG9yOiAiUmVuc2UgQ29ydGVuIgotLS0KCmBgYHtyLCBnbG9iYWxzZXR0aW5ncywgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgcmVzdWx0cz0naGlkZSd9CmxpYnJhcnkoa25pdHIpCgprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCm9wdHNfY2h1bmskc2V0KHRpZHkub3B0cz1saXN0KHdpZHRoLmN1dG9mZj0xMDApLHRpZHk9VFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UsY29tbWVudCA9ICIjPiIsIGNhY2hlPVRSVUUsIGNsYXNzLnNvdXJjZT1jKCJ0ZXN0IiksIGNsYXNzLm91dHB1dD1jKCJ0ZXN0MiIpKQpvcHRpb25zKHdpZHRoID0gMTAwKQpyZ2w6OnNldHVwS25pdHIoKQoKCgpjb2xvcml6ZSA8LSBmdW5jdGlvbih4LCBjb2xvcikge3NwcmludGYoIjxzcGFuIHN0eWxlPSdjb2xvcjogJXM7Jz4lczwvc3Bhbj4iLCBjb2xvciwgeCkgfQoKYGBgCgpgYGB7ciBrbGlwcHksIGVjaG89RkFMU0UsIGluY2x1ZGU9VFJVRX0Ka2xpcHB5OjprbGlwcHkocG9zaXRpb24gPSBjKCd0b3AnLCAncmlnaHQnKSkKI2tsaXBweTo6a2xpcHB5KGNvbG9yID0gJ2RhcmtyZWQnKQoja2xpcHB5OjprbGlwcHkodG9vbHRpcF9tZXNzYWdlID0gJ0NsaWNrIHRvIGNvcHknLCB0b29sdGlwX3N1Y2Nlc3MgPSAnRG9uZScpCmBgYAoKTGFzdCBjb21waWxlZCBvbiBgciBmb3JtYXQoU3lzLnRpbWUoKSwgJyVCLCAlWScpYAoKPGJyPgoKLS0tLQoKVGhlIHNlY29uZCBzdGVwIGlzIHRvIGZpbmQgdGhlIGZyZXF1ZW5jaWVzIG9mIHRoZSBzZWxlY3RlZCBzZXQgb2YgbmFtZXMgKG9yIFgncywgaW4gTlNVTSB0ZXJtaW5vbG9neSkuIFdlIGFnYWluIHVzZSB0aGUgW01lZXJ0ZW4gVm9vcm5hbWVuYmFua10oaHR0cHM6Ly93d3cubWVlcnRlbnMua25hdy5ubC9udmIvKSB0aGF0IHNob3dzIG5hbWUgcG9wdWxhcml0eSBieSB5ZWFyLiBXZSBkbyB0aGlzIHZpYSByZWxhdGl2ZWx5IHN0cmFpZ2h0Zm9yd2FyZCBjcmF3bCBvZiB0aGF0IHdlYnNpdGUuIFlvdSBjYW4gc2VlIHRoZSBzZWxlY3RlZCBuYW1lcyBiZWxvdyBpbiB0aGUgY29kZS4KCjxicj4KCi0tLS0KCiMgSW5pdGF0aWF0aW5nIFIgZW52aXJvbm1lbnQKClN0YXJ0IG91dCB3aXRoIGEgY3VzdG9tIGZ1bmN0aW9uIHRvIGxvYWQgYSBzZXQgb2YgcmVxdWlyZWQgcGFja2FnZXMuCiAgCmBgYHtyLCBldmFsPUZBTFNFfQojIHBhY2thZ2VzIGFuZCByZWFkIGRhdGEKcm0obGlzdCA9IGxzKCkpCgojIHNjcmFwZXIgdG8gY29sbGVjdCBwb3B1bGFyaXR5IGxpc3RzIG9mIG5hbWVzIGluIHRoZSBOZXRoZXJsYW5kcywgcGVyIHllYXIKIyBSZW5zZSBDb3J0ZW4sIFV0cmVjaHQgVW5pdmVyc2l0eSwgQXByaWwgMjAyMQoKIyAtLS0tLS0tLS0tLSBMT0FEIFRIRSBSRVFVSVJFRCBQQUNLQUdFUyAtLS0tLS0tLS0tLS0tLS0tIHwKCiMgKGMpIEpvY2hlbSBUb2xzbWEKZnBhY2thZ2UuY2hlY2sgPC0gZnVuY3Rpb24ocGFja2FnZXMpIHsKICBsYXBwbHkocGFja2FnZXMsIEZVTiA9IGZ1bmN0aW9uKHgpIHsKICAgIGlmICghcmVxdWlyZSh4LCBjaGFyYWN0ZXIub25seSA9IFRSVUUpKSB7CiAgICAgIGluc3RhbGwucGFja2FnZXMoeCwgZGVwZW5kZW5jaWVzID0gVFJVRSkKICAgICAgbGlicmFyeSh4LCBjaGFyYWN0ZXIub25seSA9IFRSVUUpCiAgICB9CiAgfSkKfQpwYWNrYWdlcyA9IGMoInRpZHl2ZXJzZSIsICJydmVzdCIsICJwb2xpdGUiKQpmcGFja2FnZS5jaGVjayhwYWNrYWdlcykKYGBgCgoKPGJyPgoKLS0tLQoKIyBGdW5jdGlvbnMKCk1ha2UgYSBjdXN0b20gZnVuY3Rpb24gdGhhdCBxdWVyaWVzIHRoZSBNZWVydGVucwoKYGBge3IsIGV2YWw9RkFMU0V9CiMgLS0tLS0tLS0tLS0gRlVOQ1RJT05TIC0tLS0tLS0tLS0tLS0tLS0gfAoKZ2V0X25hbWVfbnVtYmVyIDwtIGZ1bmN0aW9uKHNlc3Npb24sIG5hbWUpewogICN5ZWFyID0gMjAxNAogIG5hbWVfcGF0aCA9IHBhc3RlKCIvbnZiL25hYW0vaXMvIiwgbmFtZSxzZXA9IiIpICMgc2V0IHRoZSBwYXRoIGZvciB0aGUgc3BlY2lmaWMgbmFtZSdzIHdlYnBhZ2UKICAKICBuYW1lX3Nlc3Npb24gPC1ub2Qoc2Vzc2lvbiwgcGF0aCA9IG5hbWVfcGF0aCkgIyBhZ3JlZSBjaGFuZ2luZyBvZiB0aGUgcGF0aCB3aXRoIHRoZSBob3N0IChhc3N1bWluZyBJIGhhdmUgYWxyZWFkeSAiYm93ZWQiIGZvciB0aGUgaGlnaGVyLWxldmVsIHBhdGgpCiAgCiAgbmFtZV9wYWdlIDwtIHNjcmFwZShuYW1lX3Nlc3Npb24pICMgZ2V0IHRoZSBwYWdlIGZvciB0aGlzIHllYXIKICAKICBuYW1lc19kYXRhIDwtbmFtZV9wYWdlICU+JSAgIyBwYXJzZSB0aGUgcGFnZSBhcyBhIHRhYmxlLiBUdXJucyBvdXQgdGhpcyBpcyBhIGxpc3Qgb2YgdGhyZWUgdGFibGVzOyB3ZSBuZWVkIG51bWJlcnMgMiBhbmQgMwogICAgaHRtbF90YWJsZSgpCiAgCiAgbl9tIDwtIG5hbWVzX2RhdGFbWzFdXVtbIlgzIl1dWzJdICU+JSAKICAgIHN0cl9yZXBsYWNlKCItLSIsIjAiKQogIAogIG5fdiA8LSBuYW1lc19kYXRhW1sxXV1bWyJYMyJdXVs2XSAlPiUgCiAgICBzdHJfcmVwbGFjZSgiLS0iLCIwIikKICAKICAKICBwcmludChuYW1lKQogIAogIG5fdG90YWwgPC0gYXMubnVtZXJpYyhuX20pICsgYXMubnVtZXJpYyhuX3YpCiAgCiAgcmV0dXJuKG5fdG90YWwpCn0KYGBgCgoKPGJyPgoKLS0tLQoKIyBTY3JhcGluZyB0aGUgc3BlY2lmaWMgbmFtZXMKClRoZSBuYW1lIGZyZXF1ZW5jaWVzIHRoYXQgYXJlIHF1ZXJpZWQgZnJvbSBNZWVydGVucyBhcmUgZm91bmQgYmVsb3cuIFRoZXNlIGFyZSB0aGVuIHB1dCBpbnRvIGEgZGF0YWZyYW1lLgoKCmBgYHtyLCBldmFsPUZBTFNFfQoKIyAtLS0tLS0tLS0tLSBJTVBMRU1FTlQgVEhFIFNDUkFQRVIgLS0tLS0tLS0tLS0tLS0tLSB8CgpuYW1lbGlzdCA8LSBjKAoiU29waGllIiwKIkp1bGlhIiwKIlNhbm5lIiwKIkxpc2EiLAoiTGF1cmEiLAoiTWFyaWEiLAoiTGluZGEiLAoiSm9oYW5uYSIsCiJNb25pcXVlIiwKIkVzdGVyIiwKIkFubmEiLAoiRWxpc2FiZXRoIiwKIkNvcm5lbGlhIiwKIldpbGhlbG1pbmEiLAoiQW1pcmEiLAoiU2FtaXJhIiwKIlNhcmEiLAoiRGFhbiIsCiJTZW0iLAoiVGhvbWFzIiwKIk1heCIsCiJLZXZpbiIsCiJKb2hhbm5lcyIsCiJEZW5uaXMiLAoiSmVyb2VuIiwKIkphbiIsCiJNYXJjZWwiLAoiQ29ybmVsaXMiLAoiSGVuZHJpayIsCiJQZXRydXMiLAoiV2lsbGVtIiwKIkFsaSIsCiJNb2hhbW1lZCIsCiJOb29yIgopCgpuYW1lX251bWJlciA8LSBjKCkKCiMgY2hlY2sgcGVybWlzc2lvbnMgYW5kIGludHJvZHVjZSBteXNlbGYgdG8gdGhlIGhvc3QKc2Vzc2lvbiA8LSBib3coImh0dHBzOi8vd3d3Lm1lZXJ0ZW5zLmtuYXcubmwvbnZiLyIsIHVzZXJfYWdlbnQgPSAgIlIuIENvcnRlbiwgVW5pdmVyc2l0ZWl0IFV0cmVjaHQiLCBkZWxheSA9IDEpCnNlc3Npb24KCgpmb3IoaSBpbiAxOmxlbmd0aChuYW1lbGlzdCkpIHsKICBuYW1lX251bWJlcltpXSA8LSBnZXRfbmFtZV9udW1iZXIoc2Vzc2lvbiwgbmFtZWxpc3RbaV0pCn0KIAoKbmFtZXNfbnVtYmVycyA8LSBkYXRhLmZyYW1lKAogIG5hbWUgPSBuYW1lbGlzdCwKICBudW1iZXI9IG5hbWVfbnVtYmVyCikKYGBgCgoKPGJyPgoKLS0tLQoKIyBTYXZpbmcgdGhlIGRhdGEKCldlIGZpbmFsbHkgc2F2ZSB0aGUgZGF0YSBpbiBhIENTViBmaWxlLgoKYGBge3IsIGV2YWw9RkFMU0V9CndyaXRlLmNzdihuYW1lc19udW1iZXJzLCAKICAgICAgICAgIGZpbGUgPSAibmFtZV9udW1iZXJzLmNzdiIsCiAgICAgICAgICByb3cubmFtZXM9RkFMU0UpCmBgYAo=