web-dev-qa-db-ja.com

dplyrでの転置

次のdata.frameがあります

df = structure(list(HEADER = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM","AWAY_TEAM"),
                     price = c("0.863104076023855", "-0.845186446996287","CHA", "NOP")),
               .Names = c("HEADER", "price"), row.names = c(NA, 4L), class = "data.frame")

df
#>      HEADER              price
#> 1 HOME_TRPM  0.863104076023855
#> 2 AWAY_TRPM -0.845186446996287
#> 3 HOME_TEAM                CHA
#> 4 AWAY_TEAM                NOP

転置したいt()を使用せずにdplyrでそれを行うにはどうすればよいですか?私は試した

df %>% tidyr::spread(HEADER , price)

しかし、それはフラットな構造を与えませんが、代わりにこれを行います:

structure(list(AWAY_TEAM = c(NA, NA, NA, "NOP"),
     AWAY_TRPM = c(NA, "-0.845186446996287", NA, NA), 
     HOME_TEAM = c(NA, NA, "CHA", NA),
     HOME_TRPM = c("0.863104076023855", NA, NA, NA)),
 .Names = c("AWAY_TEAM", "AWAY_TRPM", "HOME_TEAM", "HOME_TRPM"),
 class = "data.frame", row.names = c(NA, 4L))

結果のdata.frameは次のようになります。

structure(list(HOME_TRPM = "0.863104076023855",
    AWAY_TRPM = "-0.845186446996287",
    HOME_TEAM = "CHA", 
    AWAY_TEAM = "NOP"), 
.Names = c("HOME_TRPM", "AWAY_TRPM", "HOME_TEAM", "AWAY_TEAM"), 
row.names = c(NA, -1L), class = "data.frame"))
10
geodex

tidyrではなくdplyrが必要だと思います。

_library(tidyr)
library(dplyr)
df %>% mutate(group = 1) %>%
       spread(HEADER, price)

  group AWAY_TEAM          AWAY_TRPM HOME_TEAM         HOME_TRPM
1     1       NOP -0.845186446996287       CHA 0.863104076023855
_

これを使用して、グループを指定できます-select(-group)を追加して後で削除できます。

17
jeremycg

これはもともと投稿されたものなので、彼らはtidyrを更新したに違いありません。

> library(dplyr)
> library(tidyr)
Warning message:
package ‘tidyr’ was built under R version 3.4.4 
> df
         HEADER              price
    1 HOME_TRPM  0.863104076023855
    2 AWAY_TRPM -0.845186446996287
    3 HOME_TEAM                CHA
    4 AWAY_TEAM                NOP

    > tidyr::spread(df, HEADER, price)
      AWAY_TEAM          AWAY_TRPM HOME_TEAM         HOME_TRPM
    1       NOP -0.845186446996287       CHA 0.863104076023855

あなたがより大きなデータフレームを持っているなら、あなたはいつでも集めて、そして広げることができます:

> mdf <- data.frame(Things = c("Cookies","Cake","Knives","Kittens", "Politics"), Darkness = sample(1:5), Despair = sample(1:5), Defeat = sample(1:5))> mdf 
    Things Darkness Despair Defeat
1  Cookies        3       4      1
2     Cake        2       2      5
3   Knives        1       3      2
4  Kittens        5       5      3
5 Politics        4       1      4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat)
     Things     Idea Warning_Level
1   Cookies Darkness             3
2      Cake Darkness             2
3    Knives Darkness             1
4   Kittens Darkness             5
5  Politics Darkness             4
6   Cookies  Despair             4
7      Cake  Despair             2
8    Knives  Despair             3
9   Kittens  Despair             5
10 Politics  Despair             1
11  Cookies   Defeat             1
12     Cake   Defeat             5
13   Knives   Defeat             2
14  Kittens   Defeat             3
15 Politics   Defeat             4
> mdf %>% tidyr::gather(Idea, Warning_Level, Darkness:Defeat) %>% tidyr::spread(Things, Warning_Level)
      Idea Cake Cookies Kittens Knives Politics
1 Darkness    2       3       5      1        4
2   Defeat    5       1       3      2        4
3  Despair    2       4       5      3        1
4
KeelyD

tibbleファミリーの)tidyverseパッケージを使用すると、t()の望ましくない影響を排除できます。

 df_t = as_tibble(t(df), rownames = "row_names")
0
Samuel