Double Loops in R: Use .name_repair to specify repair?
I have this dataset in R:
set.seed(123)
myFun <- function(n = 5000) {
a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
}
col1 = myFun(100)
col2 = myFun(100)
col3 = myFun(100)
col4 = myFun(100)
group <- c("A","B","C","D")
group = sample(group, 100, replace=TRUE)
example = data.frame(col1, col2, col3, col4, group)
col1 col2 col3 col4 group
1 SKZDZ9876D BTAMF8110T LIBFV6882H ZFIPL4295E A
2 NXJRX7189Y AIZGY5809C HSMIH4556D YJGJP8022H C
3 XPTZB2035P EEKXK0873A PCPNW1021S NMROS4134O A
4 LJMCM3436S KGADK2847O SRMUI5723N RDIXI7301N B
5 ADITC6567L HUOCT5660P AQCNE3753K FUMGY1428B D
6 BAEDP8491P IAGQG4816B TXXQH6337M SDACH5752D C
I wrote this loop that compares different string distance metrics between all combinations of (col1,col2) and (col3,col4):
method = c("osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw","soundex")
library(stringdist)
results = list()
for (i in 1:length(method))
{
method_i = method[i]
name_1_i = paste0("col1_col_2", method_i)
name_2_i = paste0("col3_col_4", method_i)
p1_i = stringdistmatrix(col1, col2, method = method_i, useNames = "string") %>%
as_tibble(rownames = "a") %>%
pivot_longer(-1, names_to = "b", values_to = name_1_i)
p2_i = stringdistmatrix(col3, col4, method = method_i, useNames = "string") %>%
as_tibble(rownames = "a") %>%
pivot_longer(-1, names_to = "b", values_to = name_2_i)
p1_i = p1_i[,3]
p2_i = p2_i[,3]
final_i = cbind(p1_i, p2_i)
results[[i]] = final_i
}
final = do.call(cbind.data.frame, results)
final = cbind(col1,col2, col3,col4, final)
average_col1_col2_dist = (final$col1_col_2osa + final$col1_col_2lv + final$col1_col_2dl + final$col1_col_2hamming + final$col1_col_2lcs + final$col1_col_2qgram + final$col1_col_2cosine + final$col1_col_2jaccard + final$col1_col_2jw + final$col1_col_2soundex)/10
average_col3_col4_dist = ( final$col3_col_4osa + final$col3_col_4lv + final$col3_col_4dl + final$col3_col_4hamming + final$col3_col_4lcs + final$col3_col_4qgram + final$col3_col_4cosine + final$col3_col_4jaccard + final$col3_col_4jw + final$col3_col_4soundex)/10
final = data.frame( col1, col2, col3, col4, average_col1_col2_dist, average_col3_col4_dist)
final = scale(final)
Now, I would like to make this a "double loop" and have the same comparisons being done, but the comparisons should be made only within each "group" :
results = list()
for (i in 1:length(method))
for (j in 1:length(unique(example$group))
{
{
groups_j = unique(example$group[j])
my_data_i = file[which(file$fsa == groups_j ), ]
method_i = method[i]
name_1_i = paste0("col1_col_2", method_i)
name_2_i = paste0("col3_col_4", method_i)
p1_i = stringdistmatrix(my_data_i$col1, my_data_i$col2, method = method_i, useNames = "string") %>%
as_tibble(rownames = "a") %>%
pivot_longer(-1, names_to = "b", values_to = name_1_i)
p2_i = stringdistmatrix(my_data_i$col3, my_data_i$col4, method = method_i, useNames = "string") %>%
as_tibble(rownames = "a") %>%
pivot_longer(-1, names_to = "b", values_to = name_2_i)
p1_i = p1_i[,3]
p2_i = p2_i[,3]
final_i = cbind(p1_i, p2_i)
results[[i]] = final_i
}
}
final = do.call(cbind.data.frame, results)
final = cbind(col1,col2, col3,col4, final)
average_col1_col2_dist = (final$col1_col_2osa + final$col1_col_2lv + final$col1_col_2dl + final$col1_col_2hamming + final$col1_col_2lcs + final$col1_col_2qgram + final$col1_col_2cosine + final$col1_col_2jaccard + final$col1_col_2jw + final$col1_col_2soundex)/10
average_col3_col4_dist = ( final$col3_col_4osa + final$col3_col_4lv + final$col3_col_4dl + final$col3_col_4hamming + final$col3_col_4lcs + final$col3_col_4qgram + final$col3_col_4cosine + final$col3_col_4jaccard + final$col3_col_4jw + final$col3_col_4soundex)/10
final = data.frame( col1, col2, col3, col4, average_col1_col2_dist, average_col3_col4_dist)
final = scale(final)
But I keep getting this error:
Error:
! Column 1 must be named.
Use .name_repair to specify repair.
Caused by error in `repaired_names()`:
! Names can't be empty.
x Empty name found at location 1.
Does anyone know how I can fix this?
Thank you!
Comments
Post a Comment