Проблемы преобразования на цифровых фрейма данных в R
У меня есть фрейм данных с числовыми значениями и символами. NA
s отображаются не как NA
, определенные R, а как символы. Как я могу преобразовать символы NA
в числовое значение 0
? Я все еще хочу, чтобы столбцы показывали 0
, потому что я не хочу удалять их из моего фрейма данных.
Я пробовал
df[is.na(df)] <-0
Но он возвращает только "0"
в виде символов, но не значений.
df <- as.numeric(as.character(df))
Дает мне предупреждающее сообщение:
NAs introduced by coercion
Есть ли другие доступные решения? Спасибо.
Вот небольшой воспроизводимый пример:structure(list(DNB = c(2.05, 2.05, 2.06, 2.32, 2.32, 2.32), `NORSK HYDRO` =
c(2.59,
2.59, 2.65, 2.81, 2.63, 2.63), ORKLA = c(2.29, 2.29, 2.18, 2.31,
2.25, 2.25), STOREBRAND = c(2.28, 2.28, 2.56, 2.88, 2.94, 2.94
), ATEA = c(2.25, 2.25, 2, 2, 2, 2), `SCHIBSTED A` = c(3.23,
3.23, 3.08, 2.92, 2.92, 2.92), BONHEUR = c(2, 2, 2, 2, 2, 2),
EKORNES = c(2.25, 2.25, 2.25, 2.25, 2.25, 2.25), `KONGSBERG GRUPPEN` =
c(2.8,
2.8, 2.5, 2.5, 2.5, 2.5), `TOMRA SYSTEMS` = c(2.43, 2.43,
2.29, 2.29, 2.29, 2.29), VEIDEKKE = c(2.33, 2.33, 2.5, 2.5,
2.33, 2.33), `ARENDALS FOSSEKOMPANI` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `OLAV THON EIEP.` = c(3, 3, 2.8, 2.8, 2.8, 2.8), `PETROLEUM GEO SERVICES` = c(3.13,
3.13, 2.86, 2.63, 2.63, 2.63), `SPAREBANK 1 SR BANK` = c(3,
3, 3, 3, 3, 3), `STOLT-NIELSEN` = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `ODFJELL 'A'` = c(2.45, 2.45,
2.4, 2.6, 2.4, 2.4), `SPAREBANK 1 NORD-NORGE` = c(3, 3, 3,
3, 3, 3), `SPAREBANK 1 SMN` = c(3, 3, 3, 3, 3, 3), `WILHS.WILHELMSEN HDG.'A'` = c(2.67,
2.67, 2.78, 2.67, 2.67, 2.67), `NORDEA BANK (~NK)` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `ATLAS COPCO 'A' (~NK)` = c(3.08,
3.08, 3.1, 2.95, 2.95, 2.95), `VOLVO 'B' (~NK)` = c(3.13,
3.13, 3.17, 2.79, 2.59, 2.59), `SANDVIK (~NK)` = c(3, 3,
2.75, 3.04, 3.09, 3.09), `SWEDBANK 'A' (~NK)` = c(2.29, 2.29,
2.21, 2.05, 2.1, 2.1), `ERICSSON 'B' (~NK)` = c(2.33, 2.33,
2.38, 2.52, 2.44, 2.44), `SVENSKA HANDBKN.'A' (~NK)` = c(2.32,
2.32, 2.33, 2.55, 2.55, 2.55), `HENNES & MAURITZ 'B' (~NK)` = c(3.35,
3.35, 3.42, 3.17, 3.06, 3.06), `SEB 'A' (~NK)` = c(2.9, 2.9,
2.9, 3, 3.09, 3.09), `INVESTOR 'B' (~NK)` = c(2.47, 2.47,
2.38, 2.69, 2.62, 2.62), `SWEDISH MATCH (~NK)` = c(2.08,
2.08, 1.83, 1.69, 1.69, 1.69), `ELECTROLUX 'B' (~NK)` = c(3.38,
3.38, 3.23, 3.13, 3.13, 3.13), `SKANSKA 'B' (~NK)` = c(2.5,
2.5, 2.43, 2.85, 2.86, 2.86), `SCA 'B' (~NK)` = c(2.96, 2.96,
2.87, 2.64, 2.55, 2.55), `SECURITAS 'B' (~NK)` = c(3.64,
3.64, 3.78, 4, 4, 4), `HOLMEN 'B' (~NK)` = c(3.16, 3.16,
3.26, 3.05, 3.24, 3.24), `SSAB 'A' (~NK)` = c(2.33, 2.33,
2.29, 2.41, 2.41, 2.41), `ERICSSON 'A' (~NK)` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), `INVESTOR 'A' (~NK)` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `VOLVO 'A' (~NK)` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `NOVO NORDISK 'B' (~NK)` = c(2.52,
2.52, 2.55, 2.64, 2.55, 2.55), `DANSKE BANK (~NK)` = c(2.12,
2.12, 2.38, 2.53, 2.58, 2.58), `COLOPLAST 'B' (~NK)` = c(3.8,
3.8, 4.13, 4.13, 4.13, 4.13), `CARLSBERG 'B' (~NK)` = c(3.11,
3.11, 3.06, 3.24, 3.24, 3.24), `A P MOLLER - MAERSK 'B' (~NK)` = c(2.89,
2.89, 2.75, 2.63, 2.75, 2.75), `TDC (~NK)` = c(2.93, 2.93,
2.96, 2.96, 3.04, 3.04), `TOPDANMARK (~NK)` = c(2.78, 2.78,
2.56, 2.8, 2.8, 2.8), `WILLIAM DEMANT HLDG. (~NK)` = c(4,
4, 3.78, 4, 3.78, 3.78), `JYSKE BANK (~NK)` = c(1.5, 1.5,
1.5, 1.5, 1.5, 1.5), `KOBENHAVNS LUFTHAVNE (~NK)` = c(2.56,
2.56, 2.47, 2.75, 2.56, 2.56), `NKT (~NK)` = c(2.25, 2.25,
2.25, 2.25, 2.25, 2.25), `ROCKWOOL 'B' (~NK)` = c(3.25, 3.25,
3, 3, 3, 3), `SYDBANK (~NK)` = c(3.6, 3.6, 3.2, 4, 4, 4),
`FLSMIDTH & CO.'B' (~NK)` = c(2.6, 2.6, 2.4, 2.4, 2.4, 2.4
), `GN STORE NORD (~NK)` = c(3, 3, 2.78, 2.89, 3.11, 3.11
), `ALK-ABELLO (~NK)` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), `BANG & OLUFSEN 'B' (~NK)` = c(4, 4,
3.67, 3.22, 3.22, 3.22), `SANTA FE GROUP (~NK)` = c(3.5,
3.5, 3.4, 3.22, 3.44, 3.44), `CARLSBERG 'A' (~NK)` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `ROCKWOOL 'A' (~NK)` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `NOKIA (~NK)` = c(1.89,
1.89, 2.04, 1.86, 1.81, 1.81), `SAMPO 'A' (~NK)` = c(2.08,
2.08, 2, 2.36, 2.36, 2.36), `KONE 'B' (~NK)` = c(3.71, 3.71,
3.77, 3.67, 3.64, 3.64), `UPM-KYMMENE (~NK)` = c(2.43, 2.43,
2.45, 2.09, 2.04, 2.04), `WARTSILA (~NK)` = c(2.13, 2.13,
2.07, 2.07, 2.07, 2.07), `METSO (~NK)` = c(2.41, 2.41, 2.41,
2.47, 2.47, 2.47), `STORA ENSO 'R' (~NK)` = c(2.76, 2.76,
2.95, 2.74, 2.57, 2.57), `HUHTAMAKI (~NK)` = c(2.33, 2.33,
2.13, 2.25, 2.25, 2.25), `FINNAIR (~NK)` = c(3, 3, 3, 2.92,
2.92, 2.92), `KEMIRA (~NK)` = c(2.4, 2.4, 2.4, 2.67, 2.8,
2.8), `UPONOR (~NK)` = c(2, 2, 2, 1.8, 1.8, 1.8), `KESKO 'B' (~NK)` = c(2.45,
2.45, 3.09, 2.58, 2.67, 2.67), `ORION 'B' (~NK)` = c(2.57,
2.57, 2.57, 2.63, 2.63, 2.63), `OUTOKUMPU 'A' (~NK)` = c(3.31,
3.31, 3.31, 3, 2.63, 2.63), `RAISIO (~NK)` = c(2.91, 2.91,
3.09, 3.08, 3, 3), `TIETO OYJ (~NK)` = c(2, 2, 2.11, 2.4,
2.4, 2.4), `METSA BOARD 'B' (~NK)` = c(3.26, 3.26, 3.32,
3.14, 2.84, 2.84), `ORION 'A' (~NK)` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `STOCKMANN 'A' (~NK)` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `STORA ENSO 'A' (~NK)` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), .Names = c("DNB", "NORSK HYDRO", "ORKLA", "STOREBRAND",
"ATEA", "SCHIBSTED A", "BONHEUR", "EKORNES", "KONGSBERG GRUPPEN",
"TOMRA SYSTEMS", "VEIDEKKE", "ARENDALS FOSSEKOMPANI", "OLAV THON EIEP.",
"PETROLEUM GEO SERVICES", "SPAREBANK 1 SR BANK", "STOLT-NIELSEN",
"ODFJELL 'A'", "SPAREBANK 1 NORD-NORGE", "SPAREBANK 1 SMN",
"WILHS.WILHELMSEN HDG.'A'",
"NORDEA BANK (~NK)", "ATLAS COPCO 'A' (~NK)", "VOLVO 'B' (~NK)",
"SANDVIK (~NK)", "SWEDBANK 'A' (~NK)", "ERICSSON 'B' (~NK)",
"SVENSKA HANDBKN.'A' (~NK)", "HENNES & MAURITZ 'B' (~NK)", "SEB 'A' (~NK)",
"INVESTOR 'B' (~NK)", "SWEDISH MATCH (~NK)", "ELECTROLUX 'B' (~NK)",
"SKANSKA 'B' (~NK)", "SCA 'B' (~NK)", "SECURITAS 'B' (~NK)",
"HOLMEN 'B' (~NK)", "SSAB 'A' (~NK)", "ERICSSON 'A' (~NK)", "INVESTOR 'A'
(~NK)",
"VOLVO 'A' (~NK)", "NOVO NORDISK 'B' (~NK)", "DANSKE BANK (~NK)",
"COLOPLAST 'B' (~NK)", "CARLSBERG 'B' (~NK)", "A P MOLLER - MAERSK 'B'
(~NK)",
"TDC (~NK)", "TOPDANMARK (~NK)", "WILLIAM DEMANT HLDG. (~NK)",
"JYSKE BANK (~NK)", "KOBENHAVNS LUFTHAVNE (~NK)", "NKT (~NK)",
"ROCKWOOL 'B' (~NK)", "SYDBANK (~NK)", "FLSMIDTH & CO.'B' (~NK)",
"GN STORE NORD (~NK)", "ALK-ABELLO (~NK)", "BANG & OLUFSEN 'B' (~NK)",
"SANTA FE GROUP (~NK)", "CARLSBERG 'A' (~NK)", "ROCKWOOL 'A' (~NK)",
"NOKIA (~NK)", "SAMPO 'A' (~NK)", "KONE 'B' (~NK)", "UPM-KYMMENE (~NK)",
"WARTSILA (~NK)", "METSO (~NK)", "STORA ENSO 'R' (~NK)", "HUHTAMAKI (~NK)",
"FINNAIR (~NK)", "KEMIRA (~NK)", "UPONOR (~NK)", "KESKO 'B' (~NK)",
"ORION 'B' (~NK)", "OUTOKUMPU 'A' (~NK)", "RAISIO (~NK)", "TIETO OYJ (~NK)",
"METSA BOARD 'B' (~NK)", "ORION 'A' (~NK)", "STOCKMANN 'A' (~NK)",
"STORA ENSO 'A' (~NK)"), row.names = c(NA, 6L), class = "data.frame")
1 ответ:
Мы можем сделать цикл над столбцами dataset,
replace
NAs с 0 и преобразовать его вnumeric
(так как есть некоторыеcharacter
столбцы)df[] <- lapply(df, function(x) as.numeric(replace(x, is.na(x), 0)))
Метод OP замены NAs на 0 сначала также должен работать, но столбцы
character
остаются какcharacter
, Если мы не изменим егоЗдесь у нас нет никаких столбцовdf[is.na(df)] <-0 df[] <- lapply(df, as.numeric)
factor
, поэтомуas.character
не требуется. Обратите внимание, чтоas.character/as.numeric
применяются кvector/columns
, а не ко всему набору данных