This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 41.71856 67.81808 51.60422 0 0 4 8.238765 0.01567249 0.2862984
#> 2 31.96341 89.64689 50.78892 0 1 3 10.470693 0.71756647 0.4788767
#> 3 48.71022 56.99243 39.44686 0 0 2 9.908121 0.10850431 0.4231335
#> 4 38.95935 60.28838 89.35484 0 1 4 29.373273 0.10859841 0.1797686
#> 5 66.53657 70.18446 49.79334 0 0 3 14.571805 0.07231850 0.4307580
#> 6 29.90567 61.94870 62.33800 0 0 3 14.023376 0.96884193 0.2894295
#> gamma chisq t_dist
#> 1 2.0357656 3.8404966 0.1096145
#> 2 1.4998904 0.7520087 -1.1145953
#> 3 0.6304285 1.2776924 -0.9743322
#> 4 0.8154457 2.0482300 1.6333500
#> 5 2.8349630 0.2202916 -0.9734399
#> 6 5.2960556 0.8458899 -1.0400458
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 67.22159 61.14397 49.961968 0 2 5 1.9228950 0.89606497 0.2071879
#> 2 74.68645 48.87572 25.035572 0 0 0 17.2112242 0.16478069 0.2869354
#> 3 68.00435 55.81918 7.540233 0 0 3 10.6444322 0.62566623 0.5055417
#> 4 73.26883 78.62319 52.538824 0 3 3 28.5985018 0.56992118 0.1980817
#> 5 57.08670 58.15019 38.365797 0 3 2 12.7014979 0.19209306 0.2835067
#> 6 60.95354 53.66263 70.510219 0 2 4 0.4664658 0.05130417 0.1220230
#> gamma chisq t_dist
#> 1 3.347114 0.8901615 0.4740891
#> 2 3.067555 5.4367785 2.4862193
#> 3 4.274730 2.6444388 -1.0474002
#> 4 1.849901 9.1521045 1.8635388
#> 5 5.429515 5.9480054 0.1960331
#> 6 5.737468 3.2040122 -0.1397868
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 68.01984 72.82804 35.643754 1 1 3 23.370730 0.8708623 0.40704110
#> 2 83.65588 66.43495 9.540925 0 1 3 35.770362 0.5868214 0.54959647
#> 3 59.74394 82.58383 34.650896 1 8 4 6.308334 0.1268779 0.17221645
#> 4 48.96896 61.84879 43.532524 0 0 5 18.686994 0.7612284 0.15184928
#> 5 72.53461 45.67266 35.700349 0 4 4 9.002839 0.8231389 0.09291141
#> 6 73.54867 79.28889 60.934396 0 0 2 21.668908 0.1522375 0.37017411
#> gamma chisq t_dist
#> 1 1.562766 5.798502 0.8114000
#> 2 24.752146 6.996460 0.3805892
#> 3 7.371747 8.172433 0.6062020
#> 4 3.762310 12.004274 1.9925462
#> 5 3.106423 16.208775 -0.4742934
#> 6 1.528204 10.242665 -0.4805331
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 0 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 0 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chisq t_dist
#> 1 6.9893762 10.286282 -0.3814568
#> 2 5.4087626 6.519658 -2.3409216
#> 3 1.2587867 8.011417 -0.4744159
#> 4 0.9871787 14.780626 0.4292511
#> 5 2.4021943 6.799788 -0.6692669
#> 6 4.2109032 17.858701 -0.3370763
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chisq t_dist
#> 1 6.7914120 4.464348 -1.0150596
#> 2 3.0132520 8.062120 0.3262369
#> 3 4.7360954 10.969593 1.5141157
#> 4 5.1235878 6.249247 0.6432708
#> 5 6.6851637 4.358815 0.2025742
#> 6 0.3903841 20.019575 1.6257109