We will explore ways to visualize network data using R in this post.
+ network: Classes for Relational Data.
+ igraph: Network Analysis and Visualzation. It's a wrapper of tidygraph.
+ tidygraph: A Tidy API for Graph Manipulation.
+ ggraph and visNetwork: to visualize graphs.
Use the below code to install and load packages.
packages = c('igraph','tidygraph',
'ggraph','visNetwork',
'lubridate','clock',
'tidyverse')
for (p in packages){
if(!require(p,character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")
glimpse(GAStech_edges)
Rows: 9,063
Columns: 8
$ source <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, …
$ target <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, …
$ SentDate <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", …
$ SentTime <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:0…
$ Subject <chr> "GT-SeismicProcessorPro Bug Report", "GT-Seismic…
$ MainSubject <chr> "Work related", "Work related", "Work related", …
$ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "…
$ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "…
dmy() transforms the SentDate to Date data type wday() returns the day of the week as a decimal number or an ordered factor if label is TRUE
GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate,
label = TRUE,
abbr = FALSE)
GAStech_edges_aggregated <- GAStech_edges %>%
filter(MainSubject == "Work related") %>%
group_by(source,target, Weekday)%>%
summarize(Weight = n())%>%
filter(source!=target)%>% #exclude people who wrote to themselves
filter(Weight > 1)%>% # exclude all nodes with weights less or equal to 1
ungroup
glimpse(GAStech_edges_aggregated)
Rows: 1,456
Columns: 4
$ source <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ target <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6…
$ Weekday <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday,…
$ Weight <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3…
GAStech_graph <- tbl_graph(nodes = GAStech_nodes,
edges = GAStech_edges_aggregated,
directed = TRUE)
GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
id label Department Title
<dbl> <chr> <chr> <chr>
1 1 Mat.Bramar Administration Assistant to CEO
2 2 Anda.Ribera Administration Assistant to CFO
3 3 Rachel.Pantanal Administration Assistant to CIO
4 4 Linda.Lagos Administration Assistant to COO
5 5 Ruscella.Mies.H… Administration Assistant to Engineering Grou…
6 6 Carla.Forluniau Administration Assistant to IT Group Manager
# … with 48 more rows
#
# Edge Data: 1,456 x 4
from to Weekday Weight
<int> <int> <ord> <int>
1 1 2 Monday 4
2 1 2 Tuesday 3
3 1 2 Wednesday 5
# … with 1,453 more rows
GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
id label Department Title
<dbl> <chr> <chr> <chr>
1 1 Mat.Bramar Administration Assistant to CEO
2 2 Anda.Ribera Administration Assistant to CFO
3 3 Rachel.Pantanal Administration Assistant to CIO
4 4 Linda.Lagos Administration Assistant to COO
5 5 Ruscella.Mies.H… Administration Assistant to Engineering Grou…
6 6 Carla.Forluniau Administration Assistant to IT Group Manager
# … with 48 more rows
#
# Edge Data: 1,456 x 4
from to Weekday Weight
<int> <int> <ord> <int>
1 1 2 Monday 4
2 1 2 Tuesday 3
3 1 2 Wednesday 5
# … with 1,453 more rows
ggraph(GAStech_graph) +
geom_edge_link() +
geom_node_point()
g <- ggraph(GAStech_graph) +
geom_edge_link(aes()) +
geom_node_point(aes())
g + theme_graph() # remove grey background color
g <- ggraph(GAStech_graph) +
geom_edge_link(aes(colour=("grey50"))) +
geom_node_point(aes(colour="grey40"))
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "fr") +
geom_edge_link(aes()) +
geom_node_point(aes())
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes()) +
geom_node_point(aes(colour = Department,
size=3))
g + theme_graph()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department),
size =3)
g + theme_graph()
set_graph_style()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department),
size =2)
g + facet_edges(~Weekday)
set_graph_style()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department),
size =2)+
theme(legend.position = "bottom")
g + facet_edges(~Weekday)
set_graph_style()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department),
size =2)
g + facet_edges(~Weekday)+
th_foreground(foreground = "grey80",
border = TRUE)+
theme(legend.position="bottom")
set_graph_style()
g <- ggraph(GAStech_graph,
layout = "nicely") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department),
size =2)
g + facet_nodes(~Department)+
th_foreground(foreground = "grey80",
border = TRUE)+
theme(legend.position="bottom")
g <- GAStech_graph %>%
mutate(betweenness_centrality = centrality_betweenness())%>%
ggraph(layout="fr")+
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department,
size = betweenness_centrality))
g + theme_graph()
g <- GAStech_graph %>%
ggraph(layout = "fr") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=Department,
size=centrality_betweenness()))
g + theme_graph()
g <- GAStech_graph %>%
mutate(community = as.factor(group_edge_betweenness(weights=Weight, directed = TRUE)))%>%
ggraph(layout = "fr") +
geom_edge_link(aes(width=Weight),
alpha=0.2) +
scale_edge_width(range=c(0.1,5))+
geom_node_point(aes(colour=community))
g + theme_graph()
GAStech_edges_aggregated <- GAStech_edges %>%
left_join(GAStech_nodes, by = c("sourceLabel"="label"))%>%
rename(from=id)%>%
left_join(GAStech_nodes, by = c("targetLabel"="label"))%>%
rename(to=id)%>%
filter(MainSubject=="Work related")%>%
group_by(from,to)%>%
summarise(weight=n())%>%
filter(from!=to)%>%
filter(weight>1)%>%
ungroup()
visNetwork(GAStech_nodes,
GAStech_edges_aggregated)%>%
visIgraphLayout("layout_with_fr") # can choose from igraph layouts
GAStech_nodes <- GAStech_nodes %>%
rename(group=Department)
visNetwork(GAStech_nodes,
GAStech_edges_aggregated)%>%
visIgraphLayout("layout_with_fr")%>%
visLegend()%>%
visLayout(randomSeed=123) #set random
visNetwork(GAStech_nodes,
GAStech_edges_aggregated)%>%
visIgraphLayout("layout_with_fr")%>%
visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)%>%
visLegend()%>%
visLayout(randomSeed=123)