R Script
Ekkart Kleinod
•
Auf dieser Seite
- stop execution
- NA-Test
- Werte aus Spaltennamen in Zeilen umwandeln
- Spalten umbenennen
- Spalten auswählen
- Text in String ersetzen (f-Funktion)
- for-Schleife
- distinct values of a column for loop
- Neue Spalte mit berechneten Werten
- Zeilen zählen
- Dateien einlesen
- Dateien speichern
- Spaltentyp umwandeln
- Geschachtelte Listen
stop execution
just stop
stop('debug stop')
stop if row count is 0
if (nrow(df) == 0) { print(df) stop('No data left.') }
NA-Test
df |> filter( is.na(name), !is.na(vorname) )
Werte aus Spaltennamen in Zeilen umwandeln
pivot_longer
Spalten heißen set_1, set_2, ...
df |> pivot_longer( cols = starts_with('set'), names_to = c('.value', 'setnumber10plus'), names_sep = '_', values_drop_na = TRUE )
Wenn mehr Spalten kommen würden wegen des sep, also bei set_1_10plus, set_2_10plus, ... zusätzliche Spalten mit NA ignorieren:
df |> pivot_longer( cols = starts_with('set'), names_to = c('.value', 'setnumber10plus', NA), names_sep = '_', values_drop_na = TRUE )
Spalten umbenennen
eine
df |> rename( neuername = altername )
mehrere mit Komma oder so:
column_renames <- c( date = 'Date', home_game = 'H.A', set_1_won = 'S1' ) df |> rename( any_of(column_renames) )
Spalten auswählen
positiv
df |> select( spalte1, spalte2 ) selcols <- c( 'spalte1', 'spalte2' ) df |> select( any_of(columns_select) )
negativ (Spalten entfernen)
df |> select( !spalte1 )
Text in String ersetzen (f-Funktion)
variable <- ' - ' str_glue('text{variable}text')
for-Schleife
for (set_count in c(3:5)) { str_glue('{set_count} Sätze') }
for (name in c('Tom', 'Tim', 'Tam')) { str_glue('{name} Peter') }
distinct values of a column for loop
(maybe possible with distinct too?)
season_overall <- 'overall' seasons <- df |> group_by( season ) |> summarize( n = n() ) filter_seasons <- c(season_overall, as.list(seasons$season)) for (filter_season in filter_seasons) { ...
Neue Spalte mit berechneten Werten
Spalte aus Werten anderer Spalten berechnen
results <- results |> mutate( stronger_opponent = (other_livepz > own_livepz) )
Werte in einer Zeile für Summen berücksichtigen, nicht komplette Spalte summieren
ungroup schaltet rowwise wieder aus
df |> rowwise() |> mutate( won_sets = sum(set_1_win, set_2_win, set_3_win, set_4_win, set_5_win, na.rm = TRUE), lost_sets = sum(set_1_lost, set_2_lost, set_3_lost, set_4_lost, set_5_lost, na.rm = TRUE), all_sets = sum(won_sets, lost_sets) ) |> ungroup()
Zeilen zählen
rowcount <- nrow(df)
Dateien einlesen
df <- read_csv( filename, col_types = list( season = col_factor() ), show_col_types = TRUE )
Dateien speichern
save_data <- function(df, the_path, the_filename) { print(df) if (!dir.exists(the_path)) { cat('Create output directory:', the_path, '\n') dir.create(the_path, recursive = TRUE) } filename <- file.path(the_path, paste0(the_filename, '.csv')) cat('Saving:', filename, '\n') write.csv(df, filename, row.names = FALSE) }
Spaltentyp umwandeln
df |> mutate( across(where(is.logical), as.factor) # factors, not logical values can be filled with missing values when grouping ) |> mutate( fct_season = factor(season, levels = c('2015', '2016', '2019')) # set factor level order here )
Geschachtelte Listen
lieber als Vektoren (c(...)), da Listen Typen mischen können
home_off <- list( 'Heim' = list('filter_value' = TRUE, 'file_suffix' = 'home'), 'Auswärts' = list('filter_value' = FALSE, 'file_suffix' = 'off') ) for (location in names(home_off)) { str_glue('location: {location}') str_glue('filter value: {home_off[[location]][["filter_value"]]}') str_glue('file suffix: {home_off[[location]][["file_suffix"]]}') }