8
8
import pandas as pd
9
9
import numpy as np
10
10
11
- import knowledge_tracing .utils .visualize as visualize
12
- import knowledge_tracing .utils .utils as utils
13
-
14
- import matplotlib .pyplot as plt
15
-
16
11
17
12
def parse_args (parser ):
18
13
parser .add_argument (
19
14
"--base_pth" ,
20
15
type = str ,
16
+ default = "../data" ,
17
+ help = "path to the directory containing the dataset" ,
21
18
)
22
- parser .add_argument ("--dataset" , type = str , default = 1 , help = "Name of dataset" )
19
+ parser .add_argument ("--dataset" , type = str , help = "Name of dataset" )
23
20
parser .add_argument (
24
21
"--gap" ,
25
22
type = int ,
26
23
default = 1 ,
27
- help = "The order of transition in evaluation causal support" ,
24
+ help = "the order of transition in evaluation causal support" ,
28
25
)
29
26
parser .add_argument (
30
27
"--test" ,
31
28
type = int ,
32
29
default = 1 ,
33
- help = "Whether using test data in evaluation causal support" ,
30
+ help = "whether using test data in evaluation causal support" ,
34
31
)
35
32
parser .add_argument (
36
33
"--num_sample" ,
37
34
type = int ,
38
35
default = 1e6 ,
39
- help = "Number of samples in estimating causal support" ,
36
+ help = "number of samples in estimating causal support" ,
40
37
)
41
38
42
39
return parser
@@ -48,31 +45,47 @@ def parse_args(parser):
48
45
parser = parse_args (parser )
49
46
args , extras = parser .parse_known_args ()
50
47
51
- # assistment12
48
+ # Load interaction data and skill corpus from files
49
+ # Read interactions CSV file
52
50
inter = pd .read_csv (
53
51
f"{ args .base_pth } /{ args .dataset } /multi_skill/interactions.csv" , sep = "\t "
54
52
)
53
+ # Load corpus object from pickle file
55
54
with open (f"{ args .base_pth } /{ args .dataset } /multi_skill/Corpus.pkl" , "rb" ) as f :
56
55
corpus = pickle .load (f )
57
56
57
+ # Extract unique skill IDs and count the number of nodes (skills)
58
58
skill_id = list (inter .skill_id .unique ())
59
59
num_node = len (skill_id )
60
60
61
+ # Create a list of skill texts by matching skill IDs in interactions
61
62
skill_list = []
62
63
for i in range (len (skill_id )):
63
64
text = list (inter .loc [inter ["skill_id" ] == i ]["skill_text" ])[0 ]
64
65
skill_list .append (text )
65
66
66
- # ----- Calculate transition matrix -----
67
+ # Calculate transition matrix for skill sequences
68
+ # The gap and start variables define the range of transitions to consider
67
69
gap = args .gap
68
- start = 10 if args .test else 0
69
- T = np .zeros ((num_node , num_node , 4 )) # 0-1, 0-0, 1-1, 1-0
70
- N = np .zeros ((num_node , num_node ))
70
+ start = (
71
+ 10 if args .test else 0
72
+ ) # Start index for considering transitions, based on whether it's a test run
73
+ T = np .zeros (
74
+ (num_node , num_node , 4 )
75
+ ) # Transition counts for each pair of skills and outcomes (0-1, 0-0, 1-1, 1-0)
76
+ N = np .zeros (
77
+ (num_node , num_node )
78
+ ) # Total transition counts between each pair of skills
79
+
80
+ # Iterate through each user sequence in the corpus
71
81
for l in range (len (corpus .user_seq_df )):
72
82
correct = corpus .user_seq_df ["correct_seq" ][l ]
73
83
index = corpus .user_seq_df ["skill_seq" ][l ]
74
84
75
- for i in range (start , start + 10 - gap ):
85
+ # Count transitions and outcomes for each sequence, considering the defined gap
86
+ for i in range (
87
+ start , start + 10 - gap
88
+ ): # Ensure transition between different skills
76
89
if index [i + gap ] != index [i ]:
77
90
if correct [i ] == 0 :
78
91
if correct [i + gap ] == 1 :
@@ -84,25 +97,29 @@ def parse_args(parser):
84
97
T [index [i ], index [i + gap ], 2 ] += 1
85
98
else :
86
99
T [index [i ], index [i + gap ], 3 ] += 1
87
- N [index [i ], index [i + gap ]] += 1
100
+ N [index [i ], index [i + gap ]] += 1 # Increment total transition count
101
+
102
+ # Calculate the probability of a successful transition
88
103
success_transition = abs (T [..., 2 ]) / (T [..., 2 ] + T [..., 3 ] + 1e-6 )
104
+ # Create a mask to filter transitions with sufficient data
89
105
mask = T [..., 2 ] + T [..., 3 ] + T [..., 0 ] + T [..., 1 ] > 1
90
106
107
+ # Counters for calculating causal support
91
108
Nc_minus = T [..., 0 ] + T [..., 1 ]
92
109
Nc_plus = T [..., 2 ] + T [..., 3 ]
93
110
Ne_minus = T [..., 1 ] + T [..., 3 ]
94
111
Ne_plus = T [..., 0 ] + T [..., 2 ]
95
112
96
- # ----- Compute causal support -----
97
- # P(D|G0)
113
+ # Compute causal support for the transitions
114
+ # Probability of data given no causal relationship ( P(D|G0) )
98
115
num_sample = args .num_sample
99
116
w0 = np .arange (0 , num_sample , 1 ) / num_sample
100
117
w0 = w0 .reshape (num_sample , 1 , 1 ).repeat (num_node , 1 ).repeat (num_node , - 1 )
101
118
p0 = np .power (w0 , np .expand_dims (Ne_plus , 0 ).repeat (num_sample , 0 )) * np .power (
102
119
1 - w0 , np .expand_dims (Ne_minus , 0 ).repeat (num_sample , 0 )
103
120
)
104
121
105
- # P(D|G1)
122
+ # Probability of data given a causal relationship ( P(D|G1) )
106
123
w0 = np .arange (0 , num_sample , 1 ) / num_sample
107
124
w0 = w0 .reshape (num_sample , 1 , 1 ).repeat (num_node , 1 ).repeat (num_node , - 1 )
108
125
w0 = w0 .repeat (num_sample , 0 )
@@ -119,5 +136,5 @@ def parse_args(parser):
119
136
120
137
p1 = np .multiply (p_e1_c1 , p_e1_c0 )
121
138
122
- # Support
139
+ # Calculate and print the causal support
123
140
support = np .log (p1 .mean (0 ) + 1e-6 ) - np .log (p0 .mean (0 ) + 1e-6 )
0 commit comments