R Script

Ekkart Kleinod  • 

stop execution

just stop

stop('debug stop')

stop if row count is 0

if (nrow(df) == 0) {
    print(df)
    stop('No data left.')
}

NA-Test

df |>
    filter(
        is.na(name),
        !is.na(vorname)
    )

Werte aus Spaltennamen in Zeilen umwandeln

pivot_longer

Spalten heißen set_1, set_2, ...

df |>
    pivot_longer(
        cols = starts_with('set'),
        names_to = c('.value', 'setnumber10plus'),
        names_sep = '_',
        values_drop_na = TRUE
    )

Wenn mehr Spalten kommen würden wegen des sep, also bei set_1_10plus, set_2_10plus, ... zusätzliche Spalten mit NA ignorieren:

df |>
    pivot_longer(
        cols = starts_with('set'),
        names_to = c('.value', 'setnumber10plus', NA),
        names_sep = '_',
        values_drop_na = TRUE
    )

Spalten umbenennen

eine

df |>
    rename(
        neuername = altername
    )

mehrere mit Komma oder so:

column_renames <- c(
    date = 'Date',
    home_game = 'H.A',
    set_1_won = 'S1'
)
df |>
    rename(
        any_of(column_renames)
    )

Spalten auswählen

positiv

df |>
    select(
        spalte1,
        spalte2
    )
selcols <- c(
    'spalte1',
    'spalte2'
)
df |>
    select(
        any_of(columns_select)
    )

negativ (Spalten entfernen)

df |>
    select(
        !spalte1
    )

Text in String ersetzen (f-Funktion)

variable <- ' - '
str_glue('text{variable}text')

for-Schleife

for (set_count in c(3:5)) {
    str_glue('{set_count} Sätze')
}
for (name in c('Tom', 'Tim', 'Tam')) {
    str_glue('{name} Peter')
}

distinct values of a column for loop

(maybe possible with distinct too?)

season_overall <- 'overall'
seasons <- df |>
    group_by(
        season
    ) |>
    summarize(
        n = n()
    )

filter_seasons <- c(season_overall, as.list(seasons$season))

for (filter_season in filter_seasons) {
    ...

Neue Spalte mit berechneten Werten

Spalte aus Werten anderer Spalten berechnen

results <- results |>
    mutate(
        stronger_opponent = (other_livepz > own_livepz)
    )

Werte in einer Zeile für Summen berücksichtigen, nicht komplette Spalte summieren

ungroup schaltet rowwise wieder aus

df |>
    rowwise() |>
    mutate(
        won_sets = sum(set_1_win, set_2_win, set_3_win, set_4_win, set_5_win, na.rm = TRUE),
        lost_sets = sum(set_1_lost, set_2_lost, set_3_lost, set_4_lost, set_5_lost, na.rm = TRUE),
        all_sets = sum(won_sets, lost_sets)
    ) |>
    ungroup()

Zeilen zählen

rowcount <- nrow(df)

Dateien einlesen

df <- read_csv(
    filename,
    col_types = list(
        season = col_factor()
    ),
    show_col_types = TRUE
)

Dateien speichern

save_data <- function(df, the_path, the_filename) {

    print(df)

    if (!dir.exists(the_path)) {
        cat('Create output directory:', the_path, '\n')
        dir.create(the_path, recursive = TRUE)
    }

    filename <- file.path(the_path, paste0(the_filename, '.csv'))
    cat('Saving:', filename, '\n')
    write.csv(df, filename, row.names = FALSE)

}

Spaltentyp umwandeln

df |>
    mutate(
        across(where(is.logical), as.factor) # factors, not logical values can be filled with missing values when grouping
    ) |>
    mutate(
        fct_season = factor(season, levels = c('2015', '2016', '2019')) # set factor level order here
    )

Geschachtelte Listen

lieber als Vektoren (c(...)), da Listen Typen mischen können

home_off <- list(
    'Heim' = list('filter_value' = TRUE, 'file_suffix' = 'home'),
    'Auswärts' = list('filter_value' = FALSE, 'file_suffix' = 'off')
)
for (location in names(home_off)) {
    str_glue('location: {location}')
    str_glue('filter value: {home_off[[location]][["filter_value"]]}')
    str_glue('file suffix: {home_off[[location]][["file_suffix"]]}')
}