::p_load(tidyverse, jsonlite, SmartEDA, tidygraph, ggraph) pacman
In-Class Exercise 05
MC 01
<- fromJSON("data/MC1_graph.json") kg
Inspect structure
str(kg, max.level =1)
List of 5
$ directed : logi TRUE
$ multigraph: logi TRUE
$ graph :List of 2
$ nodes :'data.frame': 17412 obs. of 10 variables:
$ links :'data.frame': 37857 obs. of 4 variables:
Extract and inspect
<- as_tibble(kg$nodes)
nodes_tbl <- as_tibble(kg$links) edges_tbl
Initial EDA
ggplot(data = edges_tbl, aes(y = `Edge Type`)) +
geom_bar()
Creating knowledge graph
Step 1: Mapping from node id to row index
<- tibble(id = nodes_tbl$id, index = seq_len(nrow(nodes_tbl))) id_map
Step 2: Map source and target IDs to row indices
<- edges_tbl %>%
edges_tbl left_join(id_map, by = c("source" = "id"), suffix = c("", "_source")) %>%
rename(from = index) %>%
left_join(id_map, by = c("target" = "id"), suffix = c("", "_target")) %>%
rename(to = index)
Step 3
<- edges_tbl %>%
edges_tbl filter(!is.na(from), !is.na(to))
Step 4: Creating the graph
<- tbl_graph(nodes = nodes_tbl, edges = edges_tbl,
graph directed = kg$directed)
Visualizing the knowledge graph
set.seed(1234)
Visualizing the whole Graph
ggraph(graph, layout = "fr") +
geom_edge_link(alpha = 0.3, colour = "gray") +
geom_node_point(aes(color = `Node Type`), size = 4) +
geom_node_text(aes(label = name), repel = TRUE, size = 2.5) +
theme_void()
Step 1: Filter edges to only “MemberOf”
<- graph %>%
graph_memberof activate(edges) %>%
filter(`Edge Type` == "MemberOf")
Step 2: Extract only connected nodes (ie. used in these edges)
<- graph_memberof %>%
used_node_indices activate(edges) %>%
as_tibble() %>%
select(from, to) %>%
unlist() %>%
unique()
Step 3: Keep only those nodes
<- graph_memberof %>%
graph_memberof activate(nodes) %>%
mutate(row_id = row_number()) %>%
filter(row_id %in% used_node_indices) %>%
select(-row_id) #optional cleanup
Plot the sub-graph
ggraph(graph_memberof, layout = "fr") +
geom_edge_link(alpha = 0.5, colour = "gray") +
geom_node_point(aes(color = `Node Type`), size = 1) +
geom_node_text(aes(label = name), repel = TRUE, size = 2.5) +
theme_void()