Network Data Visualisation and Analysis

We will explore ways to visualize network data using R in this post.

Published

July 10, 2021

DOI

R graph objects

+ network: Classes for Relational Data.
+ igraph: Network Analysis and Visualzation. It's a wrapper of tidygraph.
+ tidygraph: A Tidy API for Graph Manipulation.
+ ggraph and visNetwork: to visualize graphs. 

Installing and loading the required libraries

Use the below code to install and load packages.

packages = c('igraph','tidygraph',
             'ggraph','visNetwork',
             'lubridate','clock',
             'tidyverse')
for (p in packages){
  if(!require(p,character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

Importing network data from files

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")

Reviewing the imported data

SentDate is a character data instead of date data.
glimpse(GAStech_edges)
Rows: 9,063
Columns: 8
$ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, …
$ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, …
$ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", …
$ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:0…
$ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-Seismic…
$ MainSubject <chr> "Work related", "Work related", "Work related", …
$ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "…
$ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "…

Wrangling time

dmy() transforms the SentDate to Date data type wday() returns the day of the week as a decimal number or an ordered factor if label is TRUE

GAStech_edges$SentDate = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate,
                             label = TRUE,
                             abbr = FALSE)

Wrangling attributes

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source,target, Weekday)%>%
  summarize(Weight = n())%>%
  filter(source!=target)%>% #exclude people who wrote to themselves
  filter(Weight > 1)%>% # exclude all nodes with weights less or equal to 1
  ungroup

Reviewing the new data set

glimpse(GAStech_edges_aggregated)
Rows: 1,456
Columns: 4
$ source  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ target  <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6…
$ Weekday <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday,…
$ Weight  <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3…

Using tbl_graph() to build tidygraph data model

GAStech_graph <- tbl_graph(nodes = GAStech_nodes,
                           edges = GAStech_edges_aggregated,
                           directed = TRUE)
GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
     id label            Department     Title                         
  <dbl> <chr>            <chr>          <chr>                         
1     1 Mat.Bramar       Administration Assistant to CEO              
2     2 Anda.Ribera      Administration Assistant to CFO              
3     3 Rachel.Pantanal  Administration Assistant to CIO              
4     4 Linda.Lagos      Administration Assistant to COO              
5     5 Ruscella.Mies.H… Administration Assistant to Engineering Grou…
6     6 Carla.Forluniau  Administration Assistant to IT Group Manager 
# … with 48 more rows
#
# Edge Data: 1,456 x 4
   from    to Weekday   Weight
  <int> <int> <ord>      <int>
1     1     2 Monday         4
2     1     2 Tuesday        3
3     1     2 Wednesday      5
# … with 1,453 more rows

Reviewing the output tidygraph’s graph object

GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
     id label            Department     Title                         
  <dbl> <chr>            <chr>          <chr>                         
1     1 Mat.Bramar       Administration Assistant to CEO              
2     2 Anda.Ribera      Administration Assistant to CFO              
3     3 Rachel.Pantanal  Administration Assistant to CIO              
4     4 Linda.Lagos      Administration Assistant to COO              
5     5 Ruscella.Mies.H… Administration Assistant to Engineering Grou…
6     6 Carla.Forluniau  Administration Assistant to IT Group Manager 
# … with 48 more rows
#
# Edge Data: 1,456 x 4
   from    to Weekday   Weight
  <int> <int> <ord>      <int>
1     1     2 Monday         4
2     1     2 Tuesday        3
3     1     2 Wednesday      5
# … with 1,453 more rows

Plotting network graphs

Plotting a basic network graph

ggraph(GAStech_graph) +
  geom_edge_link() +
  geom_node_point()

Changing the default newwork graph theme

g <- ggraph(GAStech_graph) +
  geom_edge_link(aes()) +
  geom_node_point(aes()) 
g + theme_graph() # remove grey background color

Changing the coloring of the plot

g <- ggraph(GAStech_graph) +
  geom_edge_link(aes(colour=("grey50"))) +
  geom_node_point(aes(colour="grey40")) 
g + theme_graph()

Fruchterman and Reingold layout

g <- ggraph(GAStech_graph,
            layout = "fr") +
  geom_edge_link(aes()) +
  geom_node_point(aes()) 
g + theme_graph()

Modifying network nodes

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department,
                      size=3)) 
g + theme_graph()

Modifying network edges

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department),
                  size =3)
g + theme_graph()

Working with facet_edges()

set_graph_style()

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department),
                  size =2)
g + facet_edges(~Weekday)

change legend position

set_graph_style()

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department),
                  size =2)+
  theme(legend.position = "bottom")

g + facet_edges(~Weekday)

A framed facet graph

set_graph_style()

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department),
                  size =2)

g + facet_edges(~Weekday)+
  th_foreground(foreground = "grey80",
                border = TRUE)+
  theme(legend.position="bottom")

Working with facet_nodes()

set_graph_style()

g <- ggraph(GAStech_graph,
            layout = "nicely") +
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department),
                  size =2)

g + facet_nodes(~Department)+
  th_foreground(foreground = "grey80",
                border = TRUE)+
  theme(legend.position="bottom")

Network Metrics Analysis

Computing centrality indices

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness())%>%
  ggraph(layout="fr")+
  geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department,
                  size = betweenness_centrality))

g + theme_graph()

Visualising network metrics

g <- GAStech_graph %>%
  ggraph(layout = "fr") +
 geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=Department,
                      size=centrality_betweenness()))

g + theme_graph()

Visualising Community

g <- GAStech_graph %>%
  mutate(community = as.factor(group_edge_betweenness(weights=Weight, directed = TRUE)))%>%
  ggraph(layout = "fr") +
 geom_edge_link(aes(width=Weight),
                 alpha=0.2) +
  scale_edge_width(range=c(0.1,5))+
  geom_node_point(aes(colour=community))

g + theme_graph()

Building Interactive Network Graph with VisNetwork

Data preparation

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel"="label"))%>%
  rename(from=id)%>%
  left_join(GAStech_nodes, by = c("targetLabel"="label"))%>%
  rename(to=id)%>%
  filter(MainSubject=="Work related")%>%
  group_by(from,to)%>%
  summarise(weight=n())%>%
  filter(from!=to)%>%
  filter(weight>1)%>%
  ungroup()

Plotting the first interactive network graph

visNetwork(GAStech_nodes,
           GAStech_edges_aggregated)%>%
  visIgraphLayout("layout_with_fr") # can choose from igraph layouts

Working with visual attributes - Nodes

GAStech_nodes <- GAStech_nodes %>%
  rename(group=Department)
visNetwork(GAStech_nodes,
           GAStech_edges_aggregated)%>%
  visIgraphLayout("layout_with_fr")%>%
  visLegend()%>%
  visLayout(randomSeed=123) #set random

Interactivity

visNetwork(GAStech_nodes,
           GAStech_edges_aggregated)%>%
  visIgraphLayout("layout_with_fr")%>%
  visOptions(highlightNearest = TRUE,
             nodesIdSelection = TRUE)%>%
  visLegend()%>%
  visLayout(randomSeed=123) 

Footnotes