Skip to content

Commit 860038c

Browse files
committed
Commented out impossible vote casting edge. Not tracked.
1 parent 929c3e3 commit 860038c

File tree

1 file changed

+22
-22
lines changed

1 file changed

+22
-22
lines changed

graphml_class/motif.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def print_memory_usage():
5959
# Lets the Id:(Stack Overflow int) and id:(GraphFrames ULID) coexist
6060
.config("spark.sql.caseSensitive", True)
6161
# Single node mode - 128GB machine
62-
.config("spark.driver.memory", "48g")
63-
.config("spark.executor.memory", "48g")
62+
.config("spark.driver.memory", "16g")
63+
.config("spark.executor.memory", "8g")
6464
.getOrCreate()
6565
)
6666
sc: SparkContext = spark.sparkContext
@@ -210,26 +210,26 @@ def add_missing_columns(df, all_cols):
210210
#
211211
# Create a [User]--Cast-->[Vote] edge
212212
#
213-
user_voted_df: DataFrame = users_df.select(
214-
F.col("id").alias("src"),
215-
F.col("Id").alias("UserId"),
216-
# Everything has all the fields - should build from base records but need UUIDs
217-
F.col("PostId").alias("VotePostId"),
218-
)
219-
user_voted_edge_df: DataFrame = (
220-
user_voted_df.join(votes_df, user_voted_df.UserId == votes_df.Id)
221-
.select(
222-
# 'src' comes from the votes' 'id'
223-
"src",
224-
# 'dst' comes from the posts' 'id'
225-
F.col("id").alias("dst"),
226-
# All edges have a 'relationship' field
227-
F.lit("Cast").alias("relationship"),
228-
)
229-
.cache()
230-
)
231-
print(f"Total VotedFor edges: {voted_for_edge_df.count():,}")
232-
print(f"Percentage of linked votes: {voted_for_edge_df.count() / votes_df.count():.2%}\n")
213+
# user_voted_df: DataFrame = users_df.select(
214+
# F.col("id").alias("src"),
215+
# F.col("Id").alias("UserId"),
216+
# # Everything has all the fields - should build from base records but need UUIDs
217+
# F.col("PostId").alias("VotePostId"),
218+
# )
219+
# user_voted_edge_df: DataFrame = (
220+
# user_voted_df.join(votes_df, user_voted_df.UserId == votes_df.Id)
221+
# .select(
222+
# # 'src' comes from the votes' 'id'
223+
# "src",
224+
# # 'dst' comes from the posts' 'id'
225+
# F.col("id").alias("dst"),
226+
# # All edges have a 'relationship' field
227+
# F.lit("Cast").alias("relationship"),
228+
# )
229+
# .cache()
230+
# )
231+
# print(f"Total VotedFor edges: {voted_for_edge_df.count():,}")
232+
# print(f"Percentage of linked votes: {voted_for_edge_df.count() / votes_df.count():.2%}\n")
233233

234234

235235
#

0 commit comments

Comments
 (0)