Title: | Translation of Base R-Like Functions for 'data.table' Objects |
---|---|
Description: | Some heavily used base R functions are reconstructed to also be compliant to data.table objects. Also, some general helper functions that could be of interest for working with data.table objects are included. |
Authors: | Robin Van Oirbeek |
Maintainer: | Robin Van Oirbeek <[email protected]> |
License: | GPL-3 |
Version: | 0.2 |
Built: | 2025-02-25 06:01:33 UTC |
Source: | https://github.com/cran/R2DT |
Forcing the character/string data type on a selected set of columns of a data.table object
asCharacterDT(inputDT, colNamesToBeTransformed = NULL)
asCharacterDT(inputDT, colNamesToBeTransformed = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeTransformed |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeTransformed' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asCharacterDT(inputDT) asCharacterDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isCharacterDT(inputDT, c('x', 'y')) isFactorDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asCharacterDT(inputDT) asCharacterDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isCharacterDT(inputDT, c('x', 'y')) isFactorDT(inputDT, c('x', 'y'))
Forcing the character/string data type on a selected set of columns of a data.table object
asFactorDT(inputDT, colNamesToBeTransformed = NULL)
asFactorDT(inputDT, colNamesToBeTransformed = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeTransformed |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeTransformed' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asCharacterDT(inputDT) asCharacterDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isCharacterDT(inputDT, c('x', 'y')) isFactorDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asCharacterDT(inputDT) asCharacterDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isCharacterDT(inputDT, c('x', 'y')) isFactorDT(inputDT, c('x', 'y'))
Forcing the integer data type on a selected set of columns of a data.table object
asIntegerDT(inputDT, colNamesToBeTransformed = NULL)
asIntegerDT(inputDT, colNamesToBeTransformed = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeTransformed |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeTransformed' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asIntegerDT(inputDT) asIntegerDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isIntegerDT(inputDT, c('x', 'y')) # Note the following behavior that also holds for the as.integer() base R function. isNumericDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asIntegerDT(inputDT) asIntegerDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isIntegerDT(inputDT, c('x', 'y')) # Note the following behavior that also holds for the as.integer() base R function. isNumericDT(inputDT, c('x', 'y'))
Forcing the logical/boolean data type on a selected set of columns of a data.table object
asLogicalDT(inputDT, colNamesToBeTransformed = NULL)
asLogicalDT(inputDT, colNamesToBeTransformed = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeTransformed |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeTransformed' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asLogicalDT(inputDT) asLogicalDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isLogicalDT(inputDT, c('x', 'y')) # Notice the 'funny' side effect for the 'F' character value of column y... # This behavior is also observed for the as.logical() base R function.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) asLogicalDT(inputDT) asLogicalDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isLogicalDT(inputDT, c('x', 'y')) # Notice the 'funny' side effect for the 'F' character value of column y... # This behavior is also observed for the as.logical() base R function.
Forcing the numeric data type on a selected set of columns of a data.table object
asNumericDT(inputDT, colNamesToBeTransformed = NULL)
asNumericDT(inputDT, colNamesToBeTransformed = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeTransformed |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeTransformed' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asNumericDT(inputDT) asNumericDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isNumericDT(inputDT, c('x', 'y')) isIntegerDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asNumericDT(inputDT) asNumericDT(inputDT, c('x', 'y')) # First looking at the result, followed by testing if the transformation worked! inputDT isNumericDT(inputDT, c('x', 'y')) isIntegerDT(inputDT, c('x', 'y'))
Checking if an object is a data.table object and (optional) testing if some column names are valid for it
checkDT(inputDT, colNamesToBeChecked = NULL)
checkDT(inputDT, colNamesToBeChecked = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
No value is returned if all elements in the 'colNamesToBeChecked' argument, are valid column names of the 'inputDT' argument. In the absence of a value for the 'colNamesToBeChecked' argument, it is only tested if the 'inputDT' argument is a data.table object (is tested irrespective of the value for the 'colNamesToBeChecked' argument).
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) checkDT(inputDT) checkDT(inputDT, c('x', 'y')) checkDT(inputDT, c('x', 'y1')) checkDT(inputDT, c('x', 'y1', 'z1')) checkDT(inputDT, c('x1', 'y1', 'z1'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) checkDT(inputDT) checkDT(inputDT, c('x', 'y')) checkDT(inputDT, c('x', 'y1')) checkDT(inputDT, c('x', 'y1', 'z1')) checkDT(inputDT, c('x1', 'y1', 'z1'))
Detecting which levels of which factor of a data.table object contain non-alpha numeric characters (including whitespace) characters
detectWeirdLevelNamesDT(inputDT)
detectWeirdLevelNamesDT(inputDT)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
No value is returned. Note that a valid value needs to be supplied to the 'colNamesToBeChecked' argument in order to make this function work.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2))) detectWeirdLevelNamesDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) detectWeirdLevelNamesDT(inputDT) inputDT <- as.data.table(data.frame(x = c(rep('test_', 5), rep('test@', 5)), y = c(rep('test_', 5), rep('test@', 5)))) asFactorDT(inputDT, c('x', 'y')) detectWeirdLevelNamesDT(inputDT)
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2))) detectWeirdLevelNamesDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) detectWeirdLevelNamesDT(inputDT) inputDT <- as.data.table(data.frame(x = c(rep('test_', 5), rep('test@', 5)), y = c(rep('test_', 5), rep('test@', 5)))) asFactorDT(inputDT, c('x', 'y')) detectWeirdLevelNamesDT(inputDT)
Extracting the levels of all or a selected set of the factor columns of a data.table object
extractLevelDT(inputDT, categoricalVar = NULL)
extractLevelDT(inputDT, categoricalVar = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
categoricalVar |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
A named list is returned, with as names the different valid factor column names, either of the whole 'inputDT' argument, either of the factor variables of which the names are listed in 'categoricalVar' argument, containing a character vector with the different levels of the respective factor. In case that the 'categoricalVar' argument contains column names that aren't factors, a warning is thrown. An empty is list is returned when no valid factors (with or without the 'categoricalVar' selection turned on) are found.
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) extractLevelDT(inputDT) extractLevelDT(inputDT, c('x', 'y')) extractLevelDT(inputDT, c('x', 'y1')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) extractLevelDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = seq(2, 21, 2))) extractLevelDT(inputDT) extractLevelDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) extractLevelDT(inputDT) extractLevelDT(inputDT, c('x', 'y')) extractLevelDT(inputDT, c('x', 'y1')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) extractLevelDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = seq(2, 21, 2))) extractLevelDT(inputDT) extractLevelDT(inputDT, c('x', 'y'))
Extracting the reference level of all or a selected set of the factor columns of a data.table object
extractRefLevelDT(inputDT, categoricalVar = NULL)
extractRefLevelDT(inputDT, categoricalVar = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
categoricalVar |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
A named list is returned, with as names the different valid factor column names, either of the whole 'inputDT' argument, either of the factor variables of which the names are listed in 'categoricalVar' argument, containing a character vector of length 1 with the reference level of the respective factor. In case that the 'categoricalVar' argument contains column names that aren't factors, a warning is thrown. An empty is list is returned when no valid factors (with or without the 'categoricalVar' selection turned on) are found.
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) extractRefLevelDT(inputDT) extractRefLevelDT(inputDT, c('x', 'y')) extractRefLevelDT(inputDT, c('x', 'y1')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) extractRefLevelDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = seq(2, 21, 2))) extractRefLevelDT(inputDT) extractRefLevelDT(inputDT, c('x', 'y'))
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) extractRefLevelDT(inputDT) extractRefLevelDT(inputDT, c('x', 'y')) extractRefLevelDT(inputDT, c('x', 'y1')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) extractRefLevelDT(inputDT) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = seq(2, 21, 2))) extractRefLevelDT(inputDT) extractRefLevelDT(inputDT, c('x', 'y'))
Testing if a set of columns of a data.table object corresponds to the character/string data type
isCharacterDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
isCharacterDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
returnNames |
Logical vector of length 1 indicating whether or not the column name of the selected strings should be returned. The default value is FALSE. |
A logical vector of length the size of the 'colNamesToBeChecked' argument, or in the absence of a value the number of columns of the 'inputDT' argument, that is TRUE if the corresponding column of the 'inputDT' argument is a string If the 'returnNames' argument equals TRUE, then only those column names from the aforementioned selection of column of the 'inputDT' argument are returned that is a string.
library(data.table) inputDT <- as.data.table(data.frame(x = rep(c(TRUE, FALSE), 5), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isCharacterDT(inputDT) inputDT2 <- as.data.table(data.frame(y = LETTERS[1:10])) isCharacterDT(inputDT2) isCharacterDT(inputDT2, c('x', 'y')) isCharacterDT(inputDT2, returnNames = TRUE)
library(data.table) inputDT <- as.data.table(data.frame(x = rep(c(TRUE, FALSE), 5), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isCharacterDT(inputDT) inputDT2 <- as.data.table(data.frame(y = LETTERS[1:10])) isCharacterDT(inputDT2) isCharacterDT(inputDT2, c('x', 'y')) isCharacterDT(inputDT2, returnNames = TRUE)
Testing if a set of columns of a data.table object corresponds to the factor data type
isFactorDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
isFactorDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
returnNames |
Logical vector of length 1 indicating whether or not the column name of the selected factors should be returned. The default value is FALSE. |
A logical vector of length the size of the 'colNamesToBeChecked' argument, or in the absence of a value the number of columns of the 'inputDT' argument, that is TRUE if the corresponding column of the 'inputDT' argument is a factor. If the 'returnNames' argument equals TRUE, then only those column names from the aforementioned selection of column of the 'inputDT' argument are returned that are a factor.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isFactorDT(inputDT) isFactorDT(inputDT, c('x', 'y')) isFactorDT(inputDT, returnNames = TRUE) isFactorDT(inputDT, 'y') isFactorDT(inputDT, c('x', 'y1'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isFactorDT(inputDT) isFactorDT(inputDT, c('x', 'y')) isFactorDT(inputDT, returnNames = TRUE) isFactorDT(inputDT, 'y') isFactorDT(inputDT, c('x', 'y1'))
Testing if a set of columns of a data.table object corresponds to the integer data type
isIntegerDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
isIntegerDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
returnNames |
Logical vector of length 1 indicating whether or not the column name of the selected integers should be returned. The default value is FALSE. |
A logical vector of length the size of the 'colNamesToBeChecked' argument, or in the absence of a value the number of columns of the 'inputDT' argument, that is TRUE if the corresponding column of the 'inputDT' argument is an integer If the 'returnNames' argument equals TRUE, then only those column names from the aforementioned selection of column of the 'inputDT' argument are returned that are an integer.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1L, 20L, 2L), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isIntegerDT(inputDT) isIntegerDT(inputDT, c('x', 'y')) isIntegerDT(inputDT, returnNames = TRUE) isIntegerDT(inputDT, 'x') isIntegerDT(inputDT, c('x', 'y1'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1L, 20L, 2L), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isIntegerDT(inputDT) isIntegerDT(inputDT, c('x', 'y')) isIntegerDT(inputDT, returnNames = TRUE) isIntegerDT(inputDT, 'x') isIntegerDT(inputDT, c('x', 'y1'))
Testing if a set of columns of a data.table object corresponds to the logical/boolean data type
isLogicalDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
isLogicalDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
returnNames |
Logical vector of length 1 indicating whether or not the column name of the selected booleans should be returned. The default value is FALSE. |
A logical vector of length the size of the 'colNamesToBeChecked' argument, or in the absence of a value the number of columns of the 'inputDT' argument, that is TRUE if the corresponding column of the 'inputDT' argument is a boolean. If the 'returnNames' argument equals TRUE, then only those column names from the aforementioned selection of column of the 'inputDT' argument are returned that are a boolean.
library(data.table) inputDT <- as.data.table(data.frame(x = rep(c(TRUE, FALSE), 5), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isLogicalDT(inputDT) isLogicalDT(inputDT, c('x', 'y')) isLogicalDT(inputDT, returnNames = TRUE) isLogicalDT(inputDT, 'x') isLogicalDT(inputDT, c('x', 'y1'))
library(data.table) inputDT <- as.data.table(data.frame(x = rep(c(TRUE, FALSE), 5), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isLogicalDT(inputDT) isLogicalDT(inputDT, c('x', 'y')) isLogicalDT(inputDT, returnNames = TRUE) isLogicalDT(inputDT, 'x') isLogicalDT(inputDT, c('x', 'y1'))
Testing if a set of columns of a data.table object corresponds to the numeric data type
isNumericDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
isNumericDT(inputDT, colNamesToBeChecked = NULL, returnNames = FALSE)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
colNamesToBeChecked |
Character vector containing potential column names of the 'inputDT' argument. The default value is NULL. |
returnNames |
Logical vector of length 1 indicating whether or not the column name of the selected numerics should be returned. The default value is FALSE. |
A logical vector of length the size of the 'colNamesToBeChecked' argument, or in the absence of a value the number of columns of the 'inputDT' argument, that is TRUE if the corresponding column of the 'inputDT' argument is a numeric. If the 'returnNames' argument equals TRUE, then only those column names from the aforementioned selection of column of the 'inputDT' argument are returned that are a numeric.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isNumericDT(inputDT) isNumericDT(inputDT, c('x', 'y')) isNumericDT(inputDT, returnNames = TRUE) isNumericDT(inputDT, 'x') isNumericDT(inputDT, c('x', 'y1'))
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) isNumericDT(inputDT) isNumericDT(inputDT, c('x', 'y')) isNumericDT(inputDT, returnNames = TRUE) isNumericDT(inputDT, 'x') isNumericDT(inputDT, c('x', 'y1'))
Transform levels of all the factor columns of a data.table object to missing if too little observations pertain to a given level of it.
lowFreqLevel2MissingDT(inputDT, minNumberLevel = NULL)
lowFreqLevel2MissingDT(inputDT, minNumberLevel = NULL)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
minNumberLevel |
Numeric vector of length 1 that indicates the minimal number of observations of a given level that should be observed to avoid that that level will be deleted from the list of possible levels for that factor and the value of its observations will be turned into missing values. |
No value is returned. The level that was not underpopulated is also removed from the levels of the respective categorical variable.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y) lowFreqLevel2MissingDT(inputDT, 2) levels(inputDT$y) inputDT <- as.data.table(data.frame(x = seq(1, 40, 2), y = c(LETTERS[1:10], LETTERS[1:10]))) asFactorDT(inputDT, c('y')) levels(inputDT$y) lowFreqLevel2MissingDT(inputDT, 1) levels(inputDT$y)
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y) lowFreqLevel2MissingDT(inputDT, 2) levels(inputDT$y) inputDT <- as.data.table(data.frame(x = seq(1, 40, 2), y = c(LETTERS[1:10], LETTERS[1:10]))) asFactorDT(inputDT, c('y')) levels(inputDT$y) lowFreqLevel2MissingDT(inputDT, 1) levels(inputDT$y)
Glueing, not merging, two data.table objects together, by matching column names
rbindDT(topDT, bottomDT)
rbindDT(topDT, bottomDT)
topDT |
data.table object 1. Its values will be placed at the top of the returned data.table object. This is an obligatory argument, without default value. |
bottomDT |
data.table object 2. Its values will be placed at the bottom of the returned data.table object. This is an obligatory argument, without default value. |
The glued data.table object. Matching column names of 'topDT' and 'bottomDT' will be identified and its values will be placed in one column in the returned data.table object, the values of the 'topDT' argument on top of the values of the 'bottomDT' argument. Non-matching columns will be have missing values for the rows in the returned data.table object that correspond to the input data.table object in which the column name was not found.
Remove empty levels from all the factor columns of a data.table object
removeEmptyLevelsDT(inputDT)
removeEmptyLevelsDT(inputDT)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
No value is returned.
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y) removeEmptyLevelsDT(inputDT) levels(inputDT$y) removeEmptyLevelsDT(inputDT[x < 10]) levels(inputDT$y) # You need to define a new data.table object # in order to make the 'removeEmptyLevelsDT' function work. reducedDT <- inputDT[x < 10] levels(reducedDT$y) removeEmptyLevelsDT(reducedDT) levels(reducedDT$y)
library(data.table) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y) removeEmptyLevelsDT(inputDT) levels(inputDT$y) removeEmptyLevelsDT(inputDT[x < 10]) levels(inputDT$y) # You need to define a new data.table object # in order to make the 'removeEmptyLevelsDT' function work. reducedDT <- inputDT[x < 10] levels(reducedDT$y) removeEmptyLevelsDT(reducedDT) levels(reducedDT$y)
Setting the reference level of all or a selected set of the factor columns of a data.table object
setRefLevelDT(inputDT, categoricalVar, referenceLevel)
setRefLevelDT(inputDT, categoricalVar, referenceLevel)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
categoricalVar |
Character vector containing potential column names of the 'inputDT' argument. This is an obligatory argument, without default value. |
referenceLevel |
Character vector containing the new reference levels. This is an obligatory argument, without default value. |
No value is returned. Note that the 'categoricalVar' and 'referenceLevel' should match up, meaning that they should be of the same length and the ith element should refer to the same variable.
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) setRefLevelDT(inputDT) levels(inputDT$x)[1] levels(inputDT$y)[1] setRefLevelDT(inputDT, c('x', 'y'), c('L', 'C')) levels(inputDT$x)[1] levels(inputDT$y)[1] setRefLevelDT(inputDT, c('x', 'y'), c('bla', 'bla')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y)[1] setRefLevelDT(inputDT, 'y', 'E') levels(inputDT$y)[1]
library(data.table) inputDT <- as.data.table(data.frame(x = LETTERS[11:20], y = LETTERS[1:10])) asFactorDT(inputDT, c('x', 'y')) setRefLevelDT(inputDT) levels(inputDT$x)[1] levels(inputDT$y)[1] setRefLevelDT(inputDT, c('x', 'y'), c('L', 'C')) levels(inputDT$x)[1] levels(inputDT$y)[1] setRefLevelDT(inputDT, c('x', 'y'), c('bla', 'bla')) inputDT <- as.data.table(data.frame(x = seq(1, 20, 2), y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) levels(inputDT$y)[1] setRefLevelDT(inputDT, 'y', 'E') levels(inputDT$y)[1]
Order the rows of a data.table object by index
sortByRowIndexDT(inputDT, rowIndices)
sortByRowIndexDT(inputDT, rowIndices)
inputDT |
data.table object containing the data of interest. This is an obligatory argument, without default value. |
rowIndices |
Integer vector that contains the row indices according to which the 'inputDT' object should be ordered. This is an obligatory argument, without default value. |
The 'inputDT' data.table object, ordered according to the 'rowIndices' argument. This function assumes that the length of the 'rowIndices' argument is correspond to the number of rows of the 'inputDT' argument. If the length of the 'rowIndices' argument is smaller than the number of rows of the 'inputDT' argument, the values of the 'rowIndices' argument are recycled until the as many indices as number of rows of the 'inputDT' argument is obtained.
library(data.table) inputDT <- as.data.table(data.frame(x = 10:1, y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) inputDT sortByRowIndexDT(inputDT, 10:1) inputDT
library(data.table) inputDT <- as.data.table(data.frame(x = 10:1, y = LETTERS[1:10])) asFactorDT(inputDT, c('y')) inputDT sortByRowIndexDT(inputDT, 10:1) inputDT