Sample datasets with the correspondenceTables package

This vignette provides information about applying the correspondenceTables package on the sample datasets included in it.

ATTENTION: please set as working directory a folder different than the folder in which the package has been installed.

LOCAL COPIES OF THE SAMPLE DATA

If users wish so, they can make copies of the sample datasets in a local folder of their choice. There are two ways of doing this:

  1. Unpack into any folder of your choice the tar.gz file in which the package has arrived. All sample datasets may be found in the “inst/extdata” subfolder of this folder.

  2. Copy sample datasets from the “extdata” subfolder of the folder in which the package has been installed in your PC’s R library.


ACCESSING SAMPLE DATASETS FROM WITHIN THE PACKAGE

Application of function updateCorrespondenceTable().

Case 1

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "CN2021.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "CN2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "CPA21.csv", package = "correspondenceTables")
AB <- system.file("extdata", "CN2021_CPA21.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "CN2021_CN2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase1.csv"),
                                 "B", 0.4, 0.4)
print(UPC[[1]][1:10, 1:7])
#>     CN 2021  CN 2022  CPA 2.1 CodeChange Review Redundancy NoMatchToAStar
#> 1  01012100 01012100 01.43.11          0      0          0              0
#> 2  01012910 01012910 01.43.11          0      0          0              0
#> 3  01012990 01012990 01.43.11          0      0          0              0
#> 4  01013000 01013000 01.43.12          0      0          0              0
#> 5  01019000 01019000 01.43.12          0      0          0              0
#> 6  01022110 01022110 01.41.10          0      0          0              0
#> 7  01022130 01022130 01.41.10          0      0          0              0
#> 8  01022190 01022190 01.42.11          0      0          0              0
#> 9  01022905 01022905 01.42.11          0      0          0              0
#> 10 01022910 01022910 01.42.12          0      0          0              0
print(UPC[[2]])
#>   Classification: Name
#> 1           A: CN 2021
#> 2           B: CPA 2.1
#> 3       AStar: CN 2022

Case 2

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "CN2021.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "CN2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "PRODCOM2021.csv", package = "correspondenceTables")
AB <- system.file("extdata", "CN2021_PRODCOM2021.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "CN2021_CN2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase2.csv"), "A", 0.4, 0.3)

Case 3

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "NAICS2017.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "NAICS2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "NACE.csv", package = "correspondenceTables")
AB <- system.file("extdata", "NAICS2017_NACE.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "NAICS2017_NAICS2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase3.csv"), "none", 0.5, 0.3)

Case 4

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "CN2021.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "CN2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "NST2007.csv", package = "correspondenceTables")
AB <- system.file("extdata", "CN2021_NST2007.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "CN2021_CN2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase4.csv"), "B", 0.4, 0.3)

Case 5

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "CN2021.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "CN2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "SITC4.csv", package = "correspondenceTables")
AB <- system.file("extdata", "CN2021_SITC4.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "CN2021_CN2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase5.csv"), "B", 0.3, 0.7)

Case 6

Execute the following code in order to get the path of the required input files.

A <- system.file("extdata", "CN2021.csv", package = "correspondenceTables")
AStar <- system.file("extdata", "CN2022.csv", package = "correspondenceTables")
B <- system.file("extdata", "BEC4.csv", package = "correspondenceTables")
AB <- system.file("extdata", "CN2021_BEC4.csv", package = "correspondenceTables")
AAStar <- system.file("extdata", "CN2021_CN2022.csv", package = "correspondenceTables")

Execute the following code line to apply function updateCorrespondenceTable() on these data.

UPC <- updateCorrespondenceTable(A, B, AStar, AB, AAStar, file.path(tmp_dir,"updateCorrespondenceTableCase6.csv"), "B", 0.3, 0.6)

Application of function newCorrespondenceTable().

The function fullPath is used in all cases in order to get the path of the required input files.

fullPath <- function(CSVraw, CSVappended){
  NamesCsv <- system.file("extdata", CSVraw, package = "correspondenceTables")
  A <- read.csv(NamesCsv, header = FALSE, sep = ",")
   for (i in 1:nrow(A)) {
    for (j in 1:ncol(A)) {
      if (A[i,j]!="") {
        A[i, j] <- system.file("extdata", A[i, j], package = "correspondenceTables")
      }}}
  write.table(x = A, file = file.path(tmp_dir,CSVappended), row.names = FALSE, col.names = FALSE, sep = ",")
  return(A)
}

Case 1

fullPath("names1.csv", "names.csv")

Execute the following code to apply function newCorrespondenceTable() on these data.

system.time(NCT <- newCorrespondenceTable(file.path(tmp_dir,"names.csv"), file.path(tmp_dir,"newCorrespondenceTableCase1.csv"), "A", 0.5))
#> Percentage of codes of ISIC Rev. 4 processed:
#> 
#> Percentage of codes of CPA 2.1 processed:
#> 
print(NCT[[1]][1:10, 1:6])
#>    ISIC Rev. 4 CPC 2.1 CPA 2.1 Review Redundancy Unmatched
#> 1                           01                 0         1
#> 2                         01.1                 0         1
#> 3                        01.11                 0         1
#> 4                      01.11.1                 0         1
#> 5                      01.11.2                 0         1
#> 6                      01.11.3                 0         1
#> 7                      01.11.4                 0         1
#> 8                      01.11.5                 0         1
#> 9                      01.11.6                 0         1
#> 10                     01.11.7                 0         1
print(NCT[[2]])
#>   Classification: Name
#> 1       A: ISIC Rev. 4
#> 2          C1: CPC 2.1
#> 3           B: CPA 2.1

Case 2

fullPath("names2.csv", "names.csv")

Execute the following code to apply function newCorrespondenceTable() on these data.

system.time(NCT <- newCorrespondenceTable(file.path(tmp_dir,"names.csv"), file.path(tmp_dir,"newCorrespondenceTableCase2.csv"), "B", 0.5))
#> Percentage of codes of CN 2022 processed:
#> 
#> Percentage of codes of NACE Rev. 2 processed:
#> 

Case 3

fullPath("names3.csv", "names.csv")

Execute the following code to apply function newCorrespondenceTable() on these data.

system.time(NCT <- newCorrespondenceTable(file.path(tmp_dir,"names.csv"), file.path(tmp_dir,"newCorrespondenceTableCase3.csv"), "B", 0.5))
#> Percentage of codes of NACE Rev. 2 processed:
#> 
#> Percentage of codes of ISIC Rev. 4 processed:
#> 

Case 4

fullPath("names4.csv", "names.csv")

Execute the following code to apply function newCorrespondenceTable() on these data.

system.time(NCT <- newCorrespondenceTable(file.path(tmp_dir,"names.csv"), file.path(tmp_dir,"newCorrespondenceTableCase4.csv"), "none", 0.96))
#> Percentage of codes of NACE Rev. 2 processed:
#> 
#> Percentage of codes of SITC4 processed:
#>