Returns a subset of the transaction data stored within the given clv.data
object which meet conditions.
The given expression are forwarded to the data.table
of transactions.
Possible rows to subset and select are Id
, Date
, and Price
(if present).
# S3 method for class 'clv.data'
subset(x, subset, select, sample = c("full", "estimation", "holdout"), ...)
clv.data
to subset
logical expression indicating rows to keep
expression indicating columns to keep
Name of sample for which transactions should be extracted,
further arguments passed to data.table::subset
A copy of the data.table
of selected transactions. May contain columns Id
, Date
, and Price
.
data.table
's subset
# dont test because ncpu=2 limit on cran (too fast)
library(data.table) # for between()
data(cdnow)
clv.cdnow <- clvdata(cdnow,
date.format="ymd",
time.unit = "week",
estimation.split = "1997-09-30")
# all transactions of customer "1"
subset(clv.cdnow, Id=="1")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1 1997-01-01 29.33
#> 2: 1 1997-01-18 29.73
#> 3: 1 1997-08-02 14.96
#> 4: 1 1997-12-12 26.48
subset(clv.cdnow, subset = Id=="1")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1 1997-01-01 29.33
#> 2: 1 1997-01-18 29.73
#> 3: 1 1997-08-02 14.96
#> 4: 1 1997-12-12 26.48
# all transactions of customer "111" in the estimation period...
subset(clv.cdnow, Id=="111", sample="estimation")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 111 1997-01-06 9.97
#> 2: 111 1997-01-08 12.97
# ... and in the holdout period
subset(clv.cdnow, Id=="111", sample="holdout")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 111 1998-06-04 12.58
# all transactions of customers "1", "2", and "999"
subset(clv.cdnow, Id %in% c("1","2","999"))
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1 1997-01-01 29.33
#> 2: 1 1997-01-18 29.73
#> 3: 1 1997-08-02 14.96
#> 4: 1 1997-12-12 26.48
#> 5: 2 1997-01-01 63.34
#> 6: 2 1997-01-13 11.77
#> 7: 999 1997-02-07 10.78
#> 8: 999 1997-02-19 10.78
# all transactions on "1997-02-16"
subset(clv.cdnow, Date == "1997-02-16")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1154 1997-02-16 14.96
#> 2: 1207 1997-02-16 46.29
#> 3: 1230 1997-02-16 15.36
#> 4: 1231 1997-02-16 52.87
#> 5: 1232 1997-02-16 14.37
#> 6: 1233 1997-02-16 86.52
#> 7: 1234 1997-02-16 29.33
#> 8: 1235 1997-02-16 14.37
#> 9: 1236 1997-02-16 34.31
#> 10: 1237 1997-02-16 24.90
#> 11: 1238 1997-02-16 15.96
#> 12: 1239 1997-02-16 62.65
#> 13: 1240 1997-02-16 12.77
#> 14: 1241 1997-02-16 47.98
#> 15: 1242 1997-02-16 12.49
#> 16: 1243 1997-02-16 32.37
#> 17: 1244 1997-02-16 39.91
#> 18: 1245 1997-02-16 46.49
#> 19: 1246 1997-02-16 51.67
#> 20: 1247 1997-02-16 15.36
#> 21: 1248 1997-02-16 39.35
#> 22: 1249 1997-02-16 30.72
#> 23: 1250 1997-02-16 28.76
#> 24: 1251 1997-02-16 11.77
#> 25: 1252 1997-02-16 39.91
#> 26: 1253 1997-02-16 36.31
#> 27: 1254 1997-02-16 17.97
#> 28: 1255 1997-02-16 13.97
#> 29: 1256 1997-02-16 21.76
#> 30: 1257 1997-02-16 10.77
#> 31: 1258 1997-02-16 32.92
#> 32: 1259 1997-02-16 104.72
#> 33: 1260 1997-02-16 11.77
#> 34: 1261 1997-02-16 14.79
#> 35: 1262 1997-02-16 46.36
#> 36: 1263 1997-02-16 15.36
#> 37: 1264 1997-02-16 11.77
#> 38: 441 1997-02-16 13.97
#> 39: 552 1997-02-16 14.99
#> 40: 572 1997-02-16 84.39
#> 41: 641 1997-02-16 39.32
#> 42: 761 1997-02-16 78.01
#> 43: 793 1997-02-16 50.30
#> 44: 845 1997-02-16 88.18
#> Id Date Price
# all transactions between "1997-02-01" and "1997-02-16"
subset(clv.cdnow, Date >= "1997-02-01" & Date <= "1997-02-16")
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1000 1997-02-07 9.97
#> 2: 1001 1997-02-07 38.77
#> 3: 1002 1997-02-07 11.77
#> 4: 1003 1997-02-07 59.34
#> 5: 1004 1997-02-07 27.94
#> ---
#> 627: 995 1997-02-07 29.73
#> 628: 996 1997-02-07 45.68
#> 629: 997 1997-02-07 11.77
#> 630: 998 1997-02-07 28.76
#> 631: 999 1997-02-07 10.78
# same using data.table's between
subset(clv.cdnow, between(Date, "1997-02-01","1997-02-16"))
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1000 1997-02-07 9.97
#> 2: 1001 1997-02-07 38.77
#> 3: 1002 1997-02-07 11.77
#> 4: 1003 1997-02-07 59.34
#> 5: 1004 1997-02-07 27.94
#> ---
#> 627: 995 1997-02-07 29.73
#> 628: 996 1997-02-07 45.68
#> 629: 997 1997-02-07 11.77
#> 630: 998 1997-02-07 28.76
#> 631: 999 1997-02-07 10.78
# all transactions with a value between 50 and 100
subset(clv.cdnow, Price >= 50 & Price <= 100)
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1003 1997-02-07 59.34
#> 2: 1003 1997-08-13 68.31
#> 3: 101 1997-01-06 62.45
#> 4: 1010 1997-03-26 98.61
#> 5: 1011 1997-05-17 64.01
#> ---
#> 1016: 990 1997-11-07 97.92
#> 1017: 990 1998-03-30 75.53
#> 1018: 990 1998-06-10 60.49
#> 1019: 994 1997-02-07 59.96
#> 1020: 994 1998-02-25 62.96
# same using data.table's between
subset(clv.cdnow, between(Price, 50, 100))
#> Key: <Id, Date>
#> Id Date Price
#> <char> <Date> <num>
#> 1: 1003 1997-02-07 59.34
#> 2: 1003 1997-08-13 68.31
#> 3: 101 1997-01-06 62.45
#> 4: 1010 1997-03-26 98.61
#> 5: 1011 1997-05-17 64.01
#> ---
#> 1016: 990 1997-11-07 97.92
#> 1017: 990 1998-03-30 75.53
#> 1018: 990 1998-06-10 60.49
#> 1019: 994 1997-02-07 59.96
#> 1020: 994 1998-02-25 62.96
# only keep Id of transactions on "1997-02-16"
subset(clv.cdnow, Date == "1997-02-16", "Id")
#> Key: <Id>
#> Id
#> <char>
#> 1: 1154
#> 2: 1207
#> 3: 1230
#> 4: 1231
#> 5: 1232
#> 6: 1233
#> 7: 1234
#> 8: 1235
#> 9: 1236
#> 10: 1237
#> 11: 1238
#> 12: 1239
#> 13: 1240
#> 14: 1241
#> 15: 1242
#> 16: 1243
#> 17: 1244
#> 18: 1245
#> 19: 1246
#> 20: 1247
#> 21: 1248
#> 22: 1249
#> 23: 1250
#> 24: 1251
#> 25: 1252
#> 26: 1253
#> 27: 1254
#> 28: 1255
#> 29: 1256
#> 30: 1257
#> 31: 1258
#> 32: 1259
#> 33: 1260
#> 34: 1261
#> 35: 1262
#> 36: 1263
#> 37: 1264
#> 38: 441
#> 39: 552
#> 40: 572
#> 41: 641
#> 42: 761
#> 43: 793
#> 44: 845
#> Id