forked from EvonX/TimeChangingDecisionTree
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
73 lines (46 loc) · 1.42 KB
/
example.py
File metadata and controls
73 lines (46 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import collections
from collections import deque
from pandas import Timestamp
from tcdt import TCDT
# In[2]:
#load sample data
fname='sample.csv'
sampleData = pd.read_csv(fname, encoding='utf-8')
sampleData["b'timestamp'"] = pd.to_datetime(sampleData["b'timestamp'"])
sampleData
# In[3]:
# features for training,
# ["b'prefix0'", "b'prefix1'", "b'prefix2'", "b'request_url_path'"] indicate the hirerarchy of the url features
trainFeatures = [["b'remote_host'"],
["b'remote_user'"],
["b'request_method'"],
["b'prefix0'", "b'prefix1'",
"b'prefix2'", "b'request_url_path'"]]
labelCol = "b'status'"
timeCol = "b'timestamp'"
#train a decision tree,
trainData = sampleData[:10]
tcdt = TCDT().fit(trainData, trainFeatures, labelCol, timeCol)
#print out the tree
print(tcdt.export_text())
# In[4]:
# make a prediction, here the testData have a new change so the predictions are wrong
testData = sampleData[10:].to_numpy()
for row in testData:
print(row)
print('predict: ', tcdt.predict(row))
# In[5]:
# update the tcdt with the testData and the predictions become correct now
tcdt.update(testData[0])
for row in testData:
print(row)
print('predict: ', tcdt.predict(row))
# In[6]:
# print all tree leaves
for l in tcdt.get_allleaves():
print(l.ipath)
print(l.timeSeries)