Multi-Language Survey Forms

During data collection, it is often necessary to translate survey forms into multiple languages. Forms in KoboToolbox allow users to easily add translations, which can be used to label questions and choice values in either single or multiple-choice questions.

robotoolbox provides the capability to access these labels used in the form. The labelled package is used to encode variable labels for question labels and value labels for choice labels.

The following example showcases a project in KoboToolbox that employs three languages: English, French, and Arabic.

type name label::English (en) label::Francais (fr) label::Arabic (ar)
start start
end end
today today
text full_name What is your name? Quel est votre nom ? ما اسمك ؟
select_one yesno pet_yesno Do you have any pet? Avez-vous un animal de compagnie ? هل تمتلك حيوانا أليفا ؟
list_name name label::English (en) label::Francais (fr) label::Arabic (ar)
yesno 1 Yes Oui نعم
yesno 0 No Non لا

Loading the survey

To load this survey from your KoboToolbox server into your R session, you can use its unique identifier (uid = aYuTZn9vegi3Z49MXwKjep) with the kobo_asset function.

library(robotoolbox)
library(dplyr)
uid <- "aYuTZn9vegi3Z49MXwKjep"
asset <- kobo_asset(uid)
asset
#> <robotoolbox asset>  aYuTZn9vegi3Z49MXwKjep 
#>   Asset name: Multiple languages
#>   Asset type: survey
#>   Asset owner: dickoa
#>   Created: 2022-01-07 10:37:19
#>   Last modified: 2022-01-07 10:37:36
#>   Submissions: 5

Listing available languages

The kobo_lang function can be used to list all available languages for this survey.

kobo_lang(asset)
#> [1] "English (en)"  "Francais (fr)" "Arabic (ar)"

Reading data in each language

The kobo_data function allows you to specify the language to be loaded using the lang parameter. You can check the spelling of each language using kobo_lang.

df_en <- kobo_data(asset, lang = "English (en)")
df_fr <- kobo_data(asset, lang = "Francais (fr)")
df_ar <- kobo_data(asset, lang = "Arabic (ar)")
glimpse(df_en)
#> Rows: 5
#> Columns: 15
#> $ start                <dttm> 2022-01-07 10:39:47, 2022-01-07 10:39:37, 2022-0…
#> $ end                  <dttm> 2022-01-07 10:40:02, 2022-01-07 10:39:47, 2022-0…
#> $ today                <date> 2022-01-07, 2022-01-07, 2022-01-07, 2022-01-07, …
#> $ full_name            <chr> "Fatim", "Jean-Pierre", "إبراهيم", "Michelle", "A…
#> $ pet_yesno            <chr+lbl> "1", "0", "1", "1", "0"
#> $ `_id`                <int> 17735003, 17735002, 17735000, 17734995, 17734994
#> $ uuid                 <chr> "5388c680d19148828ea45913af820f30", "5388c680…
#> $ `__version__`        <chr> "vW6fMPTWKJmBfdSRM7jd4V", "vW6fMPTWKJmBfdSRM7jd4V…
#> $ instanceID           <chr> "uuid:03093de4-e32c-41b4-8f86-bd98f0cfd4e5", "uui…
#> $ `_xform_id_string`   <chr> "aYuTZn9vegi3Z49MXwKjep", "aYuTZn9vegi3Z49MXwKjep…
#> $ `_uuid`              <chr> "03093de4-e32c-41b4-8f86-bd98f0cfd4e5", "f3d0f43c…
#> $ `_status`            <chr> "submitted_via_web", "submitted_via_web", "submit…
#> $ `_submission_time`   <dttm> 2022-01-07 10:40:12, 2022-01-07 10:39:57, 2022-01…
#> $ `_validation_status` <chr> NA, NA, NA, NA, NA
#> $ `_submitted_by`      <lgl> NA, NA, NA, NA, NA

If the lang parameter is not specified, the default language is used. In this project, the default language is English (en).

df_default <- kobo_data(asset)
all.equal(df_default, df_en)
#> [1] TRUE

Accessing variable labels

You can use the var_label function from the labelled package to access question labels.

library(labelled)
var_label(df_en$full_name)
#> [1] "What is your name?"
var_label(df_fr$full_name)
#> [1] "Quel est votre nom ?"
var_label(df_ar$full_name)
#> [1] "ما اسمك ؟"
var_label(df_en$pet_yesno)
#> [1] "Do you have any pet?"
var_label(df_fr$pet_yesno)
#> [1] "Avez-vous un animal de compagnie ?"
var_label(df_ar$pet_yesno)
#> [1] "هل تمتلك حيوانا أليفا ؟"

Variable labels as column names

The kobo_data function has an additional parameter colnames_label (default is FALSE) that allows you to use the variable labels as column names. While it is not recommended for data analysis, it can be useful when exporting your data.frame to a spreadsheet. This feature is already available in the traditional KoboToolbox export tools.

kobo_data(asset_ml,
          colnames_label = TRUE, lang = "Arabic (ar)") |>
names()
#>  [1] "start"                   "end"                    
#>  [3] "today"                   "ما اسمك ؟"              
#>  [5] "هل تمتلك حيوانا أليفا ؟" "_id"                    
#>  [7] "uuid"                    "__version__"            
#>  [9] "instanceID"              "_xform_id_string"       
#> [11] "_uuid"                   "_status"                
#> [13] "_submission_time"        "_validation_status"     
#> [15] "_submitted_by"

Accessing labels from select_one question type

The to_factor function can convert the values of single-choice questions into labels.

table(to_factor(df_en$pet_yesno))
#> 
#> Yes  No 
#>   3   2
table(to_factor(df_fr$pet_yesno))
#> 
#> Oui Non 
#>   3   2
table(to_factor(df_ar$pet_yesno))
#> 
#> نعم  لا 
#>   3   2

If you prefer character values over factors, you can use to_character to have the labels in character format instead of factors.

count(df_ar, pet_yesno_ar = to_character(pet_yesno))
#> # A tibble: 2 × 2
#>   pet_yesno_ar     n
#>   <chr>        <int>
#> 1 لا               2
#> 2 نعم              3

Accessing labels for select_multiple question type

Labels from select_multiple is also accessible using the kobo_data parameter select_multiple_label. Let’s show this new feature, with the following form:

type name label::English (en)
start start
end end
today today
text full_name What is your name?
select_multiple pet pet_type What type of pet do you own
list_name name label::English (en)
pet 1 rabbit
pet 2 chicken
pet 3 dog
pet 4 cat
pet 5 turtle
data_sm <- kobo_data(uid)
glimpse(data_sm)
#> Rows: 5
#> Columns: 21
#> $ start                <dttm> 2022-05-09 18:31:40, 2022-05-09 18:31:53, 2022-0…
#> $ end                  <dttm> 2022-05-09 18:35:12, 2022-05-09 18:34:59, 2022-0…
#> $ today                <date> 2022-05-09, 2022-05-09, 2022-05-09, 2022-05-09, …
#> $ full_name            <chr> "Rufus", "Romulus", "Remus", "Joe", "Moh"
#> $ pet_type             <chr> "3 4", "4", "5", NA, "3 4 5"
#> $ pet_type_1           <int> 0, 0, 0, NA, 0
#> $ pet_type_2           <int> 0, 0, 0, NA, 0
#> $ pet_type_3           <int> 1, 0, 0, NA, 1
#> $ pet_type_4           <int> 1, 1, 0, NA, 1
#> $ pet_type_5           <int> 0, 0, 1, NA, 1
#> $ `_id`                <int> 20939261, 20939265, 20939278, 20939288, 20939301
#> $ instanceID           <chr> "uuid:147d4f30-7459-42f7-818f-b44f47b2cca7", "uui…
#> $ deprecatedID         <chr> "uuid:6840ad57-d9f7-4557-b1f2-11af21e5b0cd", "uui…
#> $ uuid                 <chr> "5c0d08e4deda4a7fbc9634f5e8aba62f", "5c0d08e4deda…
#> $ `__version__`        <chr> "vjPe5qiVxTmyviYSrQE3x4", "vjPe5qiVxTmyviYSrQE3x4…
#> $ `_xform_id_string`   <chr> "atbUaNGu5PWR2u4tNDsYaH", "atbUaNGu5PWR2u4tNDsYaH…
#> $ `_uuid`              <chr> "147d4f30-7459-42f7-818f-b44f47b2cca7", "6f67ede0…
#> $ `_status`            <chr> "submitted_via_web", "submitted_via_web", "submit…
#> $ `_submission_time`   <dttm> 2022-05-09 18:32:03, 2022-05-09 18:32:10, 2022-05…
#> $ `_validation_status` <chr> NA, NA, NA, NA, NA
#> $ `_submitted_by`      <lgl> NA, NA, NA, NA, NA

The column pet_type contains values (1 to 5) instead of the labels (dog, cat, etc.). Now, let’s set the new select_multiple_label to TRUE and read again the data.

data_sm_label <- kobo_data(uid,
                           select_multiple_label = TRUE)
glimpse(data_sm_label)
#> Rows: 5
#> Columns: 21
#> $ start                <dttm> 2022-05-09 18:31:40, 2022-05-09 18:31:53, 2022-0…
#> $ end                  <dttm> 2022-05-09 18:35:12, 2022-05-09 18:34:59, 2022-0…
#> $ today                <date> 2022-05-09, 2022-05-09, 2022-05-09, 2022-05-09, …
#> $ full_name            <chr> "Rufus", "Romulus", "Remus", "Joe", "Moh"
#> $ pet_type             <chr> "dog cat", "cat", "turtle", NA, "dog cat turtle"
#> $ pet_type_1           <int> 0, 0, 0, NA, 0
#> $ pet_type_2           <int> 0, 0, 0, NA, 0
#> $ pet_type_3           <int> 1, 0, 0, NA, 1
#> $ pet_type_4           <int> 1, 1, 0, NA, 1
#> $ pet_type_5           <int> 0, 0, 1, NA, 1
#> $ `_id`                <int> 20939261, 20939265, 20939278, 20939288, 20939301
#> $ instanceID           <chr> "uuid:147d4f30-7459-42f7-818f-b44f47b2cca7", "uui…
#> $ deprecatedID         <chr> "uuid:6840ad57-d9f7-4557-b1f2-11af21e5b0cd", "uui…
#> $ uuid                 <chr> "5c0d08e4deda4a7fbc9634f5e8aba62f", "5c0d08e4deda…
#> $ `__version__`        <chr> "vjPe5qiVxTmyviYSrQE3x4", "vjPe5qiVxTmyviYSrQE3x4…
#> $ `_xform_id_string`   <chr> "atbUaNGu5PWR2u4tNDsYaH", "atbUaNGu5PWR2u4tNDsYaH…
#> $ `_uuid`              <chr> "147d4f30-7459-42f7-818f-b44f47b2cca7", "6f67ede0…
#> $ `_status`            <chr> "submitted_via_web", "submitted_via_web", "submit…
#> $ `_submission_time`   <dttm> 2022-05-09 18:32:03, 2022-05-09 18:32:10, 2022-05…
#> $ `_validation_status` <chr> NA, NA, NA, NA, NA
#> $ `_submitted_by`      <lgl> NA, NA, NA, NA, NA

We can now see the labels (dog, cat, etc.) instead of the values (1 to 5) for the pet_type question.

Variable labels have been improved for all the dummy variables related to the select_multiple question (whether or not you use the select_multiple_label parameter).

var_label(data_sm_label)
#> $start
#> [1] "start"
#> 
#> $end
#> [1] "end"
#> 
#> $today
#> [1] "today"
#> 
#> $full_name
#> [1] "What is your name?"
#> 
#> $pet_type
#> [1] "What type of pet do you own ?"
#> 
#> $pet_type_1
#> [1] "What type of pet do you own ?::rabbit"
#> 
#> $pet_type_2
#> [1] "What type of pet do you own ?::chicken"
#> 
#> $pet_type_3
#> [1] "What type of pet do you own ?::dog"
#> 
#> $pet_type_4
#> [1] "What type of pet do you own ?::cat"
#> 
#> $pet_type_5
#> [1] "What type of pet do you own ?::turtle"
#> 
#> $`_id`
#> [1] "_id"
#> 
#> $instanceID
#> [1] "instanceID"
#> 
#> $deprecatedID
#> [1] "deprecatedID"
#> 
#> $uuid
#> [1] "uuid"
#> 
#> $`__version__`
#> [1] "__version__"
#> 
#> $`_xform_id_string`
#> [1] "_xform_id_string"
#> 
#> $`_uuid`
#> [1] "_uuid"
#> 
#> $`_status`
#> [1] "_status"
#> 
#> $`_submission_time`
#> [1] "_submission_time"
#> 
#> $`_validation_status`
#> [1] "_validation_status"
#> 
#> $`_submitted_by`
#> [1] "_submitted_by"

Most of these functions come from the labelled package, you can explore this package further through its documentation.