Checks for duplicated values in columns
check_duplicate.Rd
Checks for duplicated values in columns
Usage
check_duplicate(
dataset,
uuid_column = "uuid",
columns_to_check = NULL,
log_name = "duplicate_log"
)
Arguments
- dataset
dataset to be check as a dataframe or a list with the dataframe stored as "checked_dataset".
- uuid_column
uuid column in the dataset. Default is uuid.
- columns_to_check
string character with the name of the columns to check. If NULL (default), it will check for the uuid_column
- log_name
name of the log of flagged value
Value
return a list with the dataset checked stored as checked_dataset and a dataframe with the duplicate log
Examples
testdata <- data.frame(
uuid = c(letters[1:4], "a", "b", "c"),
col_a = runif(7),
col_b = runif(7)
)
check_duplicate(testdata) |>
knitr::kable()
#>
#>
#> |uuid | col_a| col_b|
#> |:----|---------:|---------:|
#> |a | 0.3974663| 0.8077894|
#> |b | 0.6818843| 0.6999456|
#> |c | 0.0177946| 0.5466694|
#> |d | 0.1220869| 0.5058249|
#> |a | 0.7882961| 0.2248206|
#> |b | 0.4744956| 0.9330499|
#> |c | 0.6247803| 0.5299978|
#>
#> |uuid |old_value |question |issue |
#> |:----|:---------|:--------|:---------------|
#> |a |a |uuid |duplicated uuid |
#> |b |b |uuid |duplicated uuid |
#> |c |c |uuid |duplicated uuid |
testdata2 <- data.frame(
uuid = letters[c(1:7)],
village = paste("village", c(1:3, 1:3, 4)),
ki_identifier = paste0("xx_", c(1:5, 3, 4))
)
check_duplicate(testdata2,
columns_to_check = "village") |>
knitr::kable()
#>
#>
#> |uuid |village |ki_identifier |
#> |:----|:---------|:-------------|
#> |a |village 1 |xx_1 |
#> |b |village 2 |xx_2 |
#> |c |village 3 |xx_3 |
#> |d |village 1 |xx_4 |
#> |e |village 2 |xx_5 |
#> |f |village 3 |xx_3 |
#> |g |village 4 |xx_4 |
#>
#> |uuid |question |old_value |issue |
#> |:----|:--------|:---------|:------------------|
#> |d |village |village 1 |duplicated village |
#> |e |village |village 2 |duplicated village |
#> |f |village |village 3 |duplicated village |
check_duplicate(testdata2,
columns_to_check = c("village", "ki_identifier"),
uuid = "uuid") |>
knitr::kable()
#>
#>
#> |uuid |village |ki_identifier |
#> |:----|:---------|:-------------|
#> |a |village 1 |xx_1 |
#> |b |village 2 |xx_2 |
#> |c |village 3 |xx_3 |
#> |d |village 1 |xx_4 |
#> |e |village 2 |xx_5 |
#> |f |village 3 |xx_3 |
#> |g |village 4 |xx_4 |
#>
#> |uuid |question |old_value |issue |
#> |:----|:-------------|:---------|:------------------------------------|
#> |f |village |village 3 |duplicated village ~/~ ki_identifier |
#> |f |ki_identifier |xx_3 |duplicated village ~/~ ki_identifier |