report
creates a scorecard modeling report and save it as a xlsx file.
report(dt, y, x, breaks_list, x_name = NULL, special_values = NULL,
seed = 618, save_report = "report", positive = "bad|1", ...)
A data frame or a list of data frames that have both x (predictor/feature) and y (response/label) variables. If there are multiple data frames are provided, only the first data frame would be used for training, and the others would be used for testing/validation.
Name of y variable.
Name of x variables. Defaults to NULL. If x is NULL, then all columns except y are counted as x variables.
A list of break points. It can be extracted from woebin
and woebin_adj
via the argument save_breaks_list.
A vector of x variables' name.
The values specified in special_values will be in separate bins. Defaults to NULL.
A random seed to split input data frame. Defaults to 618. If it is NULL, input dt will not split into two datasets.
The name of xlsx file where the report is to be saved. Defaults to 'report'.
Value of positive class, default "bad|1".
Additional parameters.
if (FALSE) {
data("germancredit")
y = 'creditability'
x = c(
"status.of.existing.checking.account",
"duration.in.month",
"credit.history",
"purpose",
"credit.amount",
"savings.account.and.bonds",
"present.employment.since",
"installment.rate.in.percentage.of.disposable.income",
"personal.status.and.sex",
"property",
"age.in.years",
"other.installment.plans",
"housing"
)
special_values=NULL
breaks_list=list(
status.of.existing.checking.account=c("... < 0 DM%,%0 <= ... < 200 DM",
"... >= 200 DM / salary assignments for at least 1 year", "no checking account"),
duration.in.month=c(8, 16, 34, 44),
credit.history=c(
"no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly",
"existing credits paid back duly till now", "delay in paying off in the past",
"critical account/ other credits existing (not at this bank)"),
purpose=c("retraining%,%car (used)", "radio/television",
"furniture/equipment%,%domestic appliances%,%business%,%repairs",
"car (new)%,%others%,%education"),
credit.amount=c(1400, 1800, 4000, 9200),
savings.account.and.bonds=c("... < 100 DM", "100 <= ... < 500 DM",
"500 <= ... < 1000 DM%,%... >= 1000 DM%,%unknown/ no savings account"),
present.employment.since=c("unemployed%,%... < 1 year", "1 <= ... < 4 years",
"4 <= ... < 7 years", "... >= 7 years"),
installment.rate.in.percentage.of.disposable.income=c(2, 3),
personal.status.and.sex=c("male : divorced/separated", "female : divorced/separated/married",
"male : single", "male : married/widowed"),
property=c("real estate", "building society savings agreement/ life insurance",
"car or other, not in attribute Savings account/bonds", "unknown / no property"),
age.in.years=c(26, 28, 35, 37),
other.installment.plans=c("bank%,%stores", "none"),
housing=c("rent", "own", "for free")
)
# Example I
# input dt is a data frame
# split input data frame into two
report(germancredit, y, x, breaks_list, special_values, seed=618, save_report='report1',
show_plot = c('ks', 'lift', 'gain', 'roc', 'lz', 'pr', 'f1', 'density'))
# donot split input data
report(germancredit, y, x, breaks_list, special_values, seed=NULL, save_report='report2')
# Example II
# input dt is a list
# only one dataset
report(list(dt=germancredit), y, x,
breaks_list, special_values, seed=NULL, save_report='report3')
# multiple datasets
report(list(dt1=germancredit[sample(1000,500)],
dt2=germancredit[sample(1000,500)]), y, x,
breaks_list, special_values, seed=NULL, save_report='report4')
# multiple datasets
report(list(dt1=germancredit[sample(1000,500)],
dt2=germancredit[sample(1000,500)],
dt3=germancredit[sample(1000,500)]), y, x,
breaks_list, special_values, seed=NULL, save_report='report5')
}