report creates a scorecard modeling report and save it as a xlsx file.

report(dt, y, x, breaks_list, x_name = NULL, special_values = NULL,
  seed = 618, save_report = "report", positive = "bad|1", ...)

Arguments

dt

A data frame or a list of data frames that have both x (predictor/feature) and y (response/label) variables. If there are multiple data frames are provided, only the first data frame would be used for training, and the others would be used for testing/validation.

y

Name of y variable.

x

Name of x variables. Defaults to NULL. If x is NULL, then all columns except y are counted as x variables.

breaks_list

A list of break points. It can be extracted from woebin and woebin_adj via the argument save_breaks_list.

x_name

A vector of x variables' name.

special_values

The values specified in special_values will be in separate bins. Defaults to NULL.

seed

A random seed to split input data frame. Defaults to 618. If it is NULL, input dt will not split into two datasets.

save_report

The name of xlsx file where the report is to be saved. Defaults to 'report'.

positive

Value of positive class, default "bad|1".

...

Additional parameters.

Examples

if (FALSE) {
data("germancredit")

y = 'creditability'
x = c(
  "status.of.existing.checking.account",
  "duration.in.month",
  "credit.history",
  "purpose",
  "credit.amount",
  "savings.account.and.bonds",
  "present.employment.since",
  "installment.rate.in.percentage.of.disposable.income",
  "personal.status.and.sex",
  "property",
  "age.in.years",
  "other.installment.plans",
  "housing"
)

special_values=NULL
breaks_list=list(
 status.of.existing.checking.account=c("... < 0 DM%,%0 <= ... < 200 DM",
   "... >= 200 DM / salary assignments for at least 1 year", "no checking account"),
 duration.in.month=c(8, 16, 34, 44),
 credit.history=c(
   "no credits taken/ all credits paid back duly%,%all credits at this bank paid back duly",
   "existing credits paid back duly till now", "delay in paying off in the past",
   "critical account/ other credits existing (not at this bank)"),
 purpose=c("retraining%,%car (used)", "radio/television",
   "furniture/equipment%,%domestic appliances%,%business%,%repairs",
   "car (new)%,%others%,%education"),
 credit.amount=c(1400, 1800, 4000, 9200),
 savings.account.and.bonds=c("... < 100 DM", "100 <= ... < 500 DM",
   "500 <= ... < 1000 DM%,%... >= 1000 DM%,%unknown/ no savings account"),
 present.employment.since=c("unemployed%,%... < 1 year", "1 <= ... < 4 years",
   "4 <= ... < 7 years", "... >= 7 years"),
 installment.rate.in.percentage.of.disposable.income=c(2, 3),
 personal.status.and.sex=c("male : divorced/separated", "female : divorced/separated/married",
   "male : single", "male : married/widowed"),
 property=c("real estate", "building society savings agreement/ life insurance",
   "car or other, not in attribute Savings account/bonds", "unknown / no property"),
 age.in.years=c(26, 28, 35, 37),
 other.installment.plans=c("bank%,%stores", "none"),
 housing=c("rent", "own", "for free")
 )

# Example I
# input dt is a data frame
# split input data frame into two
report(germancredit, y, x, breaks_list, special_values, seed=618, save_report='report1',
  show_plot = c('ks', 'lift', 'gain', 'roc', 'lz', 'pr', 'f1', 'density'))

# donot split input data
report(germancredit, y, x, breaks_list, special_values, seed=NULL, save_report='report2')

# Example II
# input dt is a list
# only one dataset
report(list(dt=germancredit), y, x,
  breaks_list, special_values, seed=NULL, save_report='report3')

# multiple datasets
report(list(dt1=germancredit[sample(1000,500)],
            dt2=germancredit[sample(1000,500)]), y, x,
 breaks_list, special_values, seed=NULL, save_report='report4')

# multiple datasets
report(list(dt1=germancredit[sample(1000,500)],
            dt2=germancredit[sample(1000,500)],
            dt3=germancredit[sample(1000,500)]), y, x,
 breaks_list, special_values, seed=NULL, save_report='report5')

}