This function provides descriptive statistic for exploratory data analysis.

describe(dt)

Arguments

dt

A data frame.

Examples

library(data.table)

data("germancredit")
dat = rbind(
  setDT(germancredit),
  data.table(creditability=sample(c("good","bad"),100,replace=TRUE)),
  fill=TRUE)

eda = describe(dat)
eda
#>                                                     variable     class count
#>                                                       <char>    <char> <int>
#>  1:                      status.of.existing.checking.account    factor  1100
#>  2:                                        duration.in.month   numeric  1100
#>  3:                                           credit.history    factor  1100
#>  4:                                                  purpose character  1100
#>  5:                                            credit.amount   numeric  1100
#>  6:                                savings.account.and.bonds    factor  1100
#>  7:                                 present.employment.since    factor  1100
#>  8:      installment.rate.in.percentage.of.disposable.income   numeric  1100
#>  9:                                  personal.status.and.sex    factor  1100
#> 10:                              other.debtors.or.guarantors    factor  1100
#> 11:                                  present.residence.since   numeric  1100
#> 12:                                                 property    factor  1100
#> 13:                                             age.in.years   numeric  1100
#> 14:                                  other.installment.plans    factor  1100
#> 15:                                                  housing    factor  1100
#> 16:                  number.of.existing.credits.at.this.bank   numeric  1100
#> 17:                                                      job    factor  1100
#> 18: number.of.people.being.liable.to.provide.maintenance.for   numeric  1100
#> 19:                                                telephone    factor  1100
#> 20:                                           foreign.worker    factor  1100
#> 21:                                            creditability    factor  1100
#>                                                     variable     class count
#>     missing_rate unique_count identical_rate   min    p25    p50     p75   max
#>            <num>        <int>          <num> <num>  <num>  <num>   <num> <num>
#>  1:       0.0909            4         0.3940    NA     NA     NA      NA    NA
#>  2:       0.0909           33         0.1840     4   12.0   18.0   24.00    72
#>  3:       0.0909            5         0.5300    NA     NA     NA      NA    NA
#>  4:       0.0909           10         0.2800    NA     NA     NA      NA    NA
#>  5:       0.0909          921         0.0030   250 1365.5 2319.5 3972.25 18424
#>  6:       0.0909            5         0.6030    NA     NA     NA      NA    NA
#>  7:       0.0909            5         0.3390    NA     NA     NA      NA    NA
#>  8:       0.0909            4         0.4760     1    2.0    3.0    4.00     4
#>  9:       0.0909            4         0.5480    NA     NA     NA      NA    NA
#> 10:       0.0909            3         0.9070    NA     NA     NA      NA    NA
#> 11:       0.0909            4         0.4130     1    2.0    3.0    4.00     4
#> 12:       0.0909            4         0.3320    NA     NA     NA      NA    NA
#> 13:       0.0909           53         0.0510    19   27.0   33.0   42.00    75
#> 14:       0.0909            3         0.8140    NA     NA     NA      NA    NA
#> 15:       0.0909            3         0.7130    NA     NA     NA      NA    NA
#> 16:       0.0909            4         0.6330     1    1.0    1.0    2.00     4
#> 17:       0.0909            4         0.6300    NA     NA     NA      NA    NA
#> 18:       0.0909            2         0.8450     1    1.0    1.0    1.00     2
#> 19:       0.0909            2         0.5960    NA     NA     NA      NA    NA
#> 20:       0.0909            2         0.9630    NA     NA     NA      NA    NA
#> 21:       0.0000            2         0.6827    NA     NA     NA      NA    NA
#>     missing_rate unique_count identical_rate   min    p25    p50     p75   max
#>         mean        sd     cv
#>        <num>     <num>  <num>
#>  1:       NA        NA     NA
#>  2:   20.903   12.0588 0.5769
#>  3:       NA        NA     NA
#>  4:       NA        NA     NA
#>  5: 3271.258 2822.7369 0.8629
#>  6:       NA        NA     NA
#>  7:       NA        NA     NA
#>  8:    2.973    1.1187 0.3763
#>  9:       NA        NA     NA
#> 10:       NA        NA     NA
#> 11:    2.845    1.1037 0.3880
#> 12:       NA        NA     NA
#> 13:   35.546   11.3755 0.3200
#> 14:       NA        NA     NA
#> 15:       NA        NA     NA
#> 16:    1.407    0.5777 0.4106
#> 17:       NA        NA     NA
#> 18:    1.155    0.3621 0.3135
#> 19:       NA        NA     NA
#> 20:       NA        NA     NA
#> 21:       NA        NA     NA
#>         mean        sd     cv