Data preparation and formatting.

Data analysis for paper

Paul Melloy (The University of Queensland)

The influence of temperature and vapour pressure deficit on conidia germination and germ tube production in an Australian Podosphaera xanthii isolate.

Compendium sections

  1. Title page
  2. Data preparation and formatting
  3. The influence of temperatures on P. xanthii conidia germination
  4. The influence of VPD P. xanthii conidia germination and germtube formation

The data analysed in this repository was provided by Dr Zaiton Sapak. The final formatted data can be found in the \cache folder of this compendium repository.

To clean the data we will need some R libraries for handling data and data manipulation.

library(readxl) # reading Microsoft excel files
library(dplyr) # Data manipulation and formatting
library(data.table) # Data manipulation and formatting
# install unpublished development version of R package `epiphytoolR`
if("epiphytoolR" %in% names(installed.packages()[, "Package"]) == FALSE) {
  remotes::install_github(repo = "",
                          ref = "dev",
                          upgrade = "never"

Temperature experimental data

Temperature effect on germination experiment

We will begin by reading in data from experiments that assessed the effect of temperature on conidia germination.

germ_tm <- read_xlsx("data/Chapter 5 (germination vs temperature vs time incubation at low VPD).xlsx",
                      range = "A1:N32")

Next we will reformat the data to make it more machine readable then save it to the /cache folder, which contains the formatted machine readable data.

# Remove unnecessary columns and rows
germ_tm <- data.table(germ_tm)[-c(1,2,7:10,15:18,23:27),-c(4,6,7,9,11,13)]

# rename leaf number column
         old = c("Temperature" ,"...2"),
         new = c("i_period","leaf"))

# fill incubation period
germ_tm[13:16,i_period := "48h"]

# remove "h" and make column numeric
germ_tm[,i_period := as.numeric(gsub("h","",germ_tm$i_period))]

# fill in na's with previous non NA value
          type = "locf",
          cols = "i_period")

# reshape data to long format
germ_tm <- 
     id.vars = c("i_period","leaf"),
     measure.vars = c("17oC", "19oC", "22oC", "25oC", "28oC", "31oC"), = "Tm", = "germ_conidia")

# remove "oC" from temperature and make it numeric
germ_tm[, c("Tm", "leaf", "germ_conidia","non_germ_conidia") := list(as.numeric(gsub("oC","",Tm)),
                                            100 - as.numeric(germ_conidia))]

# Change column order
            neworder = c("Tm", "i_period", "leaf", "germ_conidia","non_germ_conidia"))

While not recorded in the raw data, no germination was observed at temperatures 8\(^\circ\)C or 35\(^\circ\)C. This is important to include for the data analysis.

germ_tm <- rbindlist(
      list(Tm = c(rep(8, 4 * 4),
                  rep(35, 4 * 4)),
      i_period = rep(c(12, 24, 36, 48), 
                     each = 4,
                     times = 2),
      leaf = rep(1:2, 
                 each = 2, 
                 times = 2 * 2 * 2),
      germ_conidia = rep(0, 4 * 4 * 2),
      non_germ_conidia = rep(100, 4 * 4 *2))
   ), use.names = TRUE

A quick check to look at column headers and the first 5 rows of data.

# inspect data
   Tm i_period leaf germ_conidia non_germ_conidia
1: 17       12    1            7               93
2: 17       12    1            9               91
3: 17       12    2            5               95
4: 17       12    2           10               90
5: 17       24    1           17               83
6: 17       24    1           14               86
# save data into cache
fwrite(x = germ_tm[order(Tm,i_period, leaf),],
       file = "cache/germination_temperature.csv")

Effect of temperature on germtube development

Now to import and clean data observing the number of germtubes at different temperature treatments.

# read in data from sas file
br_tm <- readLines("data/tempvs.branching(results).sas")[3:40]
# cut out non-data lines
br_tm <- br_tm[-2]

#write it to csv so it can be read back in and formatted to a data.frame instead of vector.
write(br_tm, "cache/Tm_germtubes.csv")

# read in data using fread
br_tm <- fread("cache/Tm_germtubes.csv", 
              col.names = c("obs", "Tm", "germtubes","rep", "conidia"))

# Add relative humidity and temperature data
br_tm[ , c("Tm","germtubes", "obs") := list(as.numeric(gsub("oC","",Tm)),

# recalculate vpd and convert germtubes to numeric
zero_branch <- br_tm[, (100 - sum(conidia)), 
                      by = .(Tm, rep)][,germtubes :=0][, .(Tm, germtubes, rep, V1)]

setnames(zero_branch,old = "V1", new = "conidia")

# Bind in the data containing non-germinated conidia as conidia with zero germtubes
br_tm <- rbind(br_tm,zero_branch)[, conidia := round(conidia)]

While not recorded in the raw data, no germination was observed at temperatures 8\(^\circ\)C or 35\(^\circ\)C. This is important to include for the data analysis.

br_tm <- rbindlist(
      list(Tm = c(rep(8, 4 * 2),
                  rep(35, 4 * 2)),
           germtubes = rep(c(0,1,2,3), 
                         each = 2,
                         times = 2),
           rep = rep(1:2, 8),
           conidia = rep(c(100,0,0,0), 
                         each = 2,
                         times = 2)
   ), use.names = TRUE

A quick check to look at column headers and the first 5 rows of data.

# inspect data
   Tm germtubes rep conidia
1: 17         1   1      15
2: 17         1   2      12
3: 17         2   1      15
4: 17         2   2      18
5: 17         3   1       0
6: 17         3   2       0
fwrite(br_tm[order(Tm,germtubes, rep),], "cache/branching_temperature.csv")

Experimental data on VPD

Effect of VPD on germination

# Import
germ_vpd <- bind_rows(
    "data/chapter 5 (germination vs VPD vs temp vs RH).xls",
    sheet = "Sheet1",
    range = "B63:F158",
    col_names = c("RH", "Tm", "i_period", "rep", "germ_n")
    "data/chapter 5 (germination vs VPD vs temp vs RH).xls",
    sheet = "Sheet1",
    range = "I63:M158",
    col_names = c("RH", "Tm", "i_period", "rep", "germ_n")
    "data/chapter 5 (germination vs VPD vs temp vs RH).xls",
    sheet = "Sheet1",
    range = "P63:T158",
    col_names = c("RH", "Tm", "i_period", "rep", "germ_n")

# remove characters from temperature and time, make RH as a percentage
setDT(germ_vpd)[, c("RH", "Tm", "i_period","non_germ_n") :=
                  list(RH * 100,
                       as.numeric(gsub("C", "", Tm)),
                       as.numeric(gsub("h", "", i_period)),
                       100 - germ_n)]

# add vpd column
germ_vpd[, vpd := round(epiphytoolR::calc_vpd(RH = RH,
                                              Tm = Tm), 

# Change column order
            neworder = c("RH", "Tm", "vpd", "i_period", "rep", "germ_n", "non_germ_n"))

A quick check to look at column headers and the first 5 rows of data.

# inspect data
   RH Tm         vpd i_period rep germ_n non_germ_n
1: 99 28 0.038 [kPa]       12   1     56         44
2: 99 28 0.038 [kPa]       12   2     54         46
3: 99 28 0.038 [kPa]       12   3     52         48
4: 99 28 0.038 [kPa]       12   4     50         50
5: 99 28 0.038 [kPa]       24   1     64         36
6: 99 28 0.038 [kPa]       24   2     65         35
# write out clean data
       file = "cache/germination_vpd.csv")

Vapour pressure deficit effect on branching

Read in the raw data.

# Read in raw data 
br_vpd <-
  fread("data/220308_branching_vpd_temperature.csv",nrows = 29,header = TRUE)
# Remove unnecessary columns and rows
br_vpd <- br_vpd[-c(5:10,15:24),-c(1,2,9:12,19:22,29)][-c(13)]

# rename leaf number column
         old = 1:18,
         new = c("99_b1","95_b1","85_b1","75_b1","55_b1","32_b1",

# Add temperature treatment
br_vpd[ , Tm := rep(c(22,25,28), each = 4)]

# reshape data to long format
br_vpd <-
     id.vars = c("Tm"), = "conidia",
     variable.factor = FALSE)

# split variable column into two columns vpd and germtubes and remove variable column
br_vpd[, c("RH","germtubes") := 
          tstrsplit(variable, "_b")][, c("vpd","variable") :=
                                        list(round(epiphytoolR::calc_vpd(RH = as.numeric(RH),
                                                                         Tm = as.numeric(Tm)),

# Change column order
            neworder = c("vpd","RH", "Tm", "germtubes","conidia"))

# set correct classes and add non-germinated
br_vpd[, c("RH",
           "rep") :=

# Create data.table representing non-germinated conidia
## Assuming only 100 conidia were examined
zero_branch <- br_vpd[, (100 - sum(conidia)), 
                      by = .(vpd,RH,Tm, rep)][,germtubes :=0][, .(vpd,RH, Tm, germtubes, rep, V1)]

# some treatment reps contained greater than 100 conidia assessments, therefore 
#  any negative number will be converted to zero and no non-germinated conidia are
#  assumed to be observed
zero_branch[,V1 := fifelse(V1 < 0, 0, V1)]

# rename column
setnames(zero_branch,old = "V1", new = "conidia")

# Bind and order data containing non-germinated conidia as conidia with zero germtubes
br_vpd <- rbind(br_vpd,zero_branch)[order(Tm,germtubes, rep)]

Before saving the data a quick data check is made to look at column headers and the first 5 rows of data.

# inspect data
           vpd RH Tm germtubes conidia rep
1: 0.026 [kPa] 99 22         0       2   1
2: 0.132 [kPa] 95 22         0      73   1
3: 0.397 [kPa] 85 22         0      69   1
4: 0.661 [kPa] 75 22         0      84   1
5: 1.190 [kPa] 55 22         0      81   1
6: 1.798 [kPa] 32 22         0      90   1
# save data into cache
fwrite(x = br_vpd,
       file = "cache/branching_VPD.csv")

VPD branching dataset two

br_vpd2 <- 
  read_xls("data/Chapter 5 (germination vs VPD vs temp vs RH).xls",
           sheet = "Sheet2",
           range = "AF3:AI182",
           col_names = c("vpd", "germtubes", "rep", "conidia"))

# make data.table format and add temperature and humidity
br_vpd2 <- data.table(br_vpd2)[, c("Tm","RH") := list(rep(c(22,25,28), each = 12, times = 5),
                                                            rep(c(95,85,75,55,32), each = 12*3))]
# remove "b" from germtubes and make it numeric
br_vpd2[, germtubes := as.numeric(gsub("b","",germtubes))]

# use saturated branching data from temperature experiment
br_vpd99 <- copy(br_tm)
br_vpd99[, RH := 99][, c("vpd",
                         "RH") := list(as.numeric(round(epiphytoolR::calc_vpd(RH = RH,
                                                                 Tm = Tm),

br_vpd2 <- rbind(br_vpd99[
   Tm %in% unique(br_vpd2$Tm) &
      germtubes != 0,],

# recalculate vpd
br_vpd2[, vpd := round(epiphytoolR::calc_vpd(RH = RH,
                                                Tm = Tm),

# Create data.table representing non-germinated conidia
## Assuming only 100 conidia were examined
zero_branch <- br_vpd2[, (100 - sum(conidia)), 
                      by = .(Tm, RH,vpd, rep)][,germtubes :=0][, .(vpd,RH,Tm, germtubes, rep, V1)]

# some treatment reps contained greater than 100 conidia assessments, therefore 
#  any negative number will be converted to zero and no non-germinated conidia are
#  assumed to be observed
zero_branch[,V1 := fifelse(V1 < 0, 0, V1)]

# rename column
setnames(zero_branch,old = "V1", new = "conidia")

# Change column order
            neworder = c("vpd", "RH","Tm", "germtubes","rep","conidia"))

# Bind and order data containing non-germinated conidia as conidia with zero germtubes
br_vpd2 <- rbind(br_vpd2,zero_branch)[order(Tm,germtubes, rep)]

Before saving the data a quick data check is made to look at column headers and the first 5 rows of data.

# inspect data
           vpd RH Tm germtubes rep conidia
1: 0.026 [kPa] 99 22         0   1      34
2: 0.132 [kPa] 95 22         0   1      86
3: 0.397 [kPa] 85 22         0   1      86
4: 0.661 [kPa] 75 22         0   1      92
5: 1.190 [kPa] 55 22         0   1      91
6: 1.798 [kPa] 32 22         0   1      93
# save data into cache
fwrite(x = br_vpd2,
       file = "cache/branching_VPD2.csv")

More data on germtube development at a range of VPD

# read in data from sas file
dat <- readLines("data/vpd")[3:236]
# cut out non-data lines
dat <- dat[-c(2,55:58,111:114,167:170,223:226)]
#write it to csv
write(dat, "cache/vpd_germtubes.csv")

# read in data using fread
br_vpd3 <- fread("cache/vpd_germtubes.csv")

# Add relative humidity and temperature data
br_vpd3[ , c("RH","Tm") := list(rep(c(99,95,85,75,55,32),each = 36),
                                    rep(c(22,25,28),each = 12, times = 6))]

# recalculate vpd and convert germtubes to numeric
br_vpd3[, c("vpd",
                "bran") :=
              list(round(epiphytoolR::calc_vpd(RH = RH,
                                               Tm = Tm),
                   as.numeric(gsub("b", "", bran)))]

# rearrange columns
            neworder = c("Obs", "RH", "Tm", "vpd", "bran", "rep", "numb")

         old = c("bran", "numb"),
         new = c("germtubes","conidia"))

# Create data.table representing non-germinated conidia
## Assuming only 100 conidia were examined
zero_branch <- br_vpd3[, (100 - sum(conidia)), 
                      by = .(Tm, RH,vpd, rep)][,germtubes :=0][, .(vpd, RH, Tm, germtubes, rep, V1)]

# some treatment reps contained greater than 100 conidia assessments, therefore 
#  any negative number will be converted to zero and no non-germinated conidia are
#  assumed to be observed
zero_branch[,V1 := fifelse(V1 < 0, 0, V1)]

# rename column
setnames(zero_branch,old = "V1", new = "conidia")

# Change column order
setcolorder(br_vpd3[, c("Obs") := NULL],
            neworder = c("vpd", "RH", "Tm", "germtubes","rep","conidia"))

# Bind and order data containing non-germinated conidia as conidia with zero germtubes
br_vpd3 <- rbind(br_vpd3, zero_branch)[order(Tm,germtubes, rep)]

Before saving the data a quick data check is made to look at column headers and the first 5 rows of data.

# inspect data
           vpd RH Tm germtubes rep conidia
1: 0.026 [kPa] 99 22         0   1      19
2: 0.132 [kPa] 95 22         0   1      86
3: 0.397 [kPa] 85 22         0   1      91
4: 0.661 [kPa] 75 22         0   1      92
5: 1.190 [kPa] 55 22         0   1      93
6: 1.798 [kPa] 32 22         0   1      92
       file = "cache/branching_VPD3.csv")

To describe how the methods were undertaken we need to determine how many leaf discs were inoculated and in how many runs. Also how many were destructively sampled in each experiment.

n_dat <- 
   germ_tm = germ_tm[ , length(unique(leaf)), by = .(Tm, i_period)][,sum(V1)],
   germ_vpd = germ_vpd[ , length(unique(rep)), by = .(vpd, i_period)][,sum(V1)],
   br_tm = br_tm[, length(unique(rep)), by = .(Tm, rep)][,sum(V1)],
   br_vpd = br_vpd[, length(unique(rep)), by = .(vpd, rep)][,sum(V1)],
   br_vpd2 = br_vpd2[, length(unique(rep)), by = .(vpd, rep)][,sum(V1)],
   br_vpd3 = br_vpd3[, length(unique(rep)), by = .(vpd, rep)][,sum(V1)]

germ_tm    64
germ_vpd  288
br_tm      16
br_vpd     72
br_vpd2    66
br_vpd3    72
R session info
R version 4.2.0 (2022-04-22 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

[1] LC_COLLATE=English_Australia.utf8 
[2] LC_CTYPE=English_Australia.utf8   
[3] LC_MONETARY=English_Australia.utf8
[4] LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
[1] data.table_1.14.2 dplyr_1.0.9       readxl_1.4.0     
[4] devtools_2.4.3    usethis_2.1.5    

loaded via a namespace (and not attached):
 [1] tidyselect_1.1.2       xfun_0.31             
 [3] bslib_0.3.1            remotes_2.4.2         
 [5] purrr_0.3.4            vctrs_0.4.1           
 [7] generics_0.1.2         testthat_3.1.4        
 [9] htmltools_0.5.2        yaml_2.3.5            
[11] utf8_1.2.2             rlang_1.0.2           
[13] pkgbuild_1.3.1         jquerylib_0.1.4       
[15] pillar_1.7.0           glue_1.6.2            
[17] withr_2.5.0            sessioninfo_1.2.2     
[19] lifecycle_1.0.1        stringr_1.4.0         
[21] cellranger_1.1.0       evaluate_0.15         
[23] memoise_2.0.1          knitr_1.39            
[25] callr_3.7.0            fastmap_1.1.0         
[27] ps_1.7.0               fansi_1.0.3           
[29] Rcpp_1.0.8.3           cachem_1.0.6          
[31] desc_1.4.1             pkgload_1.2.4         
[33] jsonlite_1.8.0         fs_1.5.2              
[35] brio_1.1.3             distill_1.4           
[37] digest_0.6.29          stringi_1.7.6         
[39] processx_3.5.3         rprojroot_2.0.3       
[41] cli_3.3.0              tools_4.2.0           
[43] magrittr_2.0.3         sass_0.4.1            
[45] tibble_3.1.7           crayon_1.5.1          
[47] pkgconfig_2.0.3        downlit_0.4.0         
[49] ellipsis_0.3.2         prettyunits_1.1.1     
[51] rmarkdown_2.14         rematch_1.0.1         
[53] epiphytoolR_0.0.0.9001 rstudioapi_0.13       
[55] R6_2.5.1               units_0.8-0           
[57] compiler_4.2.0