This function provides descriptive statistic for exploratory data analysis.
describe(dt)
A data frame.
library(data.table)
data("germancredit")
dat = rbind(
setDT(germancredit),
data.table(creditability=sample(c("good","bad"),100,replace=TRUE)),
fill=TRUE)
eda = describe(dat)
eda
#> variable class count
#> <char> <char> <int>
#> 1: status.of.existing.checking.account factor 1100
#> 2: duration.in.month numeric 1100
#> 3: credit.history factor 1100
#> 4: purpose character 1100
#> 5: credit.amount numeric 1100
#> 6: savings.account.and.bonds factor 1100
#> 7: present.employment.since factor 1100
#> 8: installment.rate.in.percentage.of.disposable.income numeric 1100
#> 9: personal.status.and.sex factor 1100
#> 10: other.debtors.or.guarantors factor 1100
#> 11: present.residence.since numeric 1100
#> 12: property factor 1100
#> 13: age.in.years numeric 1100
#> 14: other.installment.plans factor 1100
#> 15: housing factor 1100
#> 16: number.of.existing.credits.at.this.bank numeric 1100
#> 17: job factor 1100
#> 18: number.of.people.being.liable.to.provide.maintenance.for numeric 1100
#> 19: telephone factor 1100
#> 20: foreign.worker factor 1100
#> 21: creditability factor 1100
#> variable class count
#> missing_rate unique_count identical_rate min p25 p50 p75 max
#> <num> <int> <num> <num> <num> <num> <num> <num>
#> 1: 0.0909 4 0.3940 NA NA NA NA NA
#> 2: 0.0909 33 0.1840 4 12.0 18.0 24.00 72
#> 3: 0.0909 5 0.5300 NA NA NA NA NA
#> 4: 0.0909 10 0.2800 NA NA NA NA NA
#> 5: 0.0909 921 0.0030 250 1365.5 2319.5 3972.25 18424
#> 6: 0.0909 5 0.6030 NA NA NA NA NA
#> 7: 0.0909 5 0.3390 NA NA NA NA NA
#> 8: 0.0909 4 0.4760 1 2.0 3.0 4.00 4
#> 9: 0.0909 4 0.5480 NA NA NA NA NA
#> 10: 0.0909 3 0.9070 NA NA NA NA NA
#> 11: 0.0909 4 0.4130 1 2.0 3.0 4.00 4
#> 12: 0.0909 4 0.3320 NA NA NA NA NA
#> 13: 0.0909 53 0.0510 19 27.0 33.0 42.00 75
#> 14: 0.0909 3 0.8140 NA NA NA NA NA
#> 15: 0.0909 3 0.7130 NA NA NA NA NA
#> 16: 0.0909 4 0.6330 1 1.0 1.0 2.00 4
#> 17: 0.0909 4 0.6300 NA NA NA NA NA
#> 18: 0.0909 2 0.8450 1 1.0 1.0 1.00 2
#> 19: 0.0909 2 0.5960 NA NA NA NA NA
#> 20: 0.0909 2 0.9630 NA NA NA NA NA
#> 21: 0.0000 2 0.6827 NA NA NA NA NA
#> missing_rate unique_count identical_rate min p25 p50 p75 max
#> mean sd cv
#> <num> <num> <num>
#> 1: NA NA NA
#> 2: 20.903 12.0588 0.5769
#> 3: NA NA NA
#> 4: NA NA NA
#> 5: 3271.258 2822.7369 0.8629
#> 6: NA NA NA
#> 7: NA NA NA
#> 8: 2.973 1.1187 0.3763
#> 9: NA NA NA
#> 10: NA NA NA
#> 11: 2.845 1.1037 0.3880
#> 12: NA NA NA
#> 13: 35.546 11.3755 0.3200
#> 14: NA NA NA
#> 15: NA NA NA
#> 16: 1.407 0.5777 0.4106
#> 17: NA NA NA
#> 18: 1.155 0.3621 0.3135
#> 19: NA NA NA
#> 20: NA NA NA
#> 21: NA NA NA
#> mean sd cv