This function provides descriptive statistic for exploratory data analysis.
describe(dt)library(data.table)
data("germancredit")
dat = rbind(
setDT(germancredit),
data.table(creditability=sample(c("good","bad"),100,replace=TRUE)),
fill=TRUE)
eda = describe(dat)
eda
#> variable class count
#> <char> <char> <int>
#> 1: age.in.years numeric 1100
#> 2: credit.amount numeric 1100
#> 3: credit.history factor 1100
#> 4: creditability factor 1100
#> 5: duration.in.month numeric 1100
#> 6: foreign.worker factor 1100
#> 7: housing factor 1100
#> 8: installment.rate.in.percentage.of.disposable.income numeric 1100
#> 9: job factor 1100
#> 10: number.of.existing.credits.at.this.bank numeric 1100
#> 11: number.of.people.being.liable.to.provide.maintenance.for numeric 1100
#> 12: other.debtors.or.guarantors factor 1100
#> 13: other.installment.plans factor 1100
#> 14: personal.status.and.sex factor 1100
#> 15: present.employment.since factor 1100
#> 16: present.residence.since numeric 1100
#> 17: property factor 1100
#> 18: purpose character 1100
#> 19: savings.account.and.bonds factor 1100
#> 20: status.of.existing.checking.account factor 1100
#> 21: telephone factor 1100
#> variable class count
#> <char> <char> <int>
#> missing_rate unique_count identical_rate sd Min. 1st Qu. Median
#> <num> <int> <num> <num> <num> <num> <num>
#> 1: 0.0909 53 0.0510 11.3755 19 27.0 33.0
#> 2: 0.0909 921 0.0030 2822.7369 250 1365.5 2319.5
#> 3: 0.0909 5 0.5300 NA NA NA NA
#> 4: 0.0000 2 0.6809 NA NA NA NA
#> 5: 0.0909 33 0.1840 12.0588 4 12.0 18.0
#> 6: 0.0909 2 0.9630 NA NA NA NA
#> 7: 0.0909 3 0.7130 NA NA NA NA
#> 8: 0.0909 4 0.4760 1.1187 1 2.0 3.0
#> 9: 0.0909 4 0.6300 NA NA NA NA
#> 10: 0.0909 4 0.6330 0.5777 1 1.0 1.0
#> 11: 0.0909 2 0.8450 0.3621 1 1.0 1.0
#> 12: 0.0909 3 0.9070 NA NA NA NA
#> 13: 0.0909 3 0.8140 NA NA NA NA
#> 14: 0.0909 4 0.5480 NA NA NA NA
#> 15: 0.0909 5 0.3390 NA NA NA NA
#> 16: 0.0909 4 0.4130 1.1037 1 2.0 3.0
#> 17: 0.0909 4 0.3320 NA NA NA NA
#> 18: 0.0909 10 0.2800 NA NA NA NA
#> 19: 0.0909 5 0.6030 NA NA NA NA
#> 20: 0.0909 4 0.3940 NA NA NA NA
#> 21: 0.0909 2 0.5960 NA NA NA NA
#> missing_rate unique_count identical_rate sd Min. 1st Qu. Median
#> <num> <int> <num> <num> <num> <num> <num>
#> Mean 3rd Qu. Max. NA's
#> <num> <num> <num> <num>
#> 1: 35.546 42.00 75 100
#> 2: 3271.258 3972.25 18424 100
#> 3: NA NA NA NA
#> 4: NA NA NA NA
#> 5: 20.903 24.00 72 100
#> 6: NA NA NA NA
#> 7: NA NA NA NA
#> 8: 2.973 4.00 4 100
#> 9: NA NA NA NA
#> 10: 1.407 2.00 4 100
#> 11: 1.155 1.00 2 100
#> 12: NA NA NA NA
#> 13: NA NA NA NA
#> 14: NA NA NA NA
#> 15: NA NA NA NA
#> 16: 2.845 4.00 4 100
#> 17: NA NA NA NA
#> 18: NA NA NA NA
#> 19: NA NA NA NA
#> 20: NA NA NA NA
#> 21: NA NA NA NA
#> Mean 3rd Qu. Max. NA's
#> <num> <num> <num> <num>