已部署到shinyapps.io,详见SHMetro
0.配置环境和加载包
## encoding
options(encoding = "UTF-8") ## for chines
## use getOption("encoding") to see if things were changed
loc <- function(os, language = "english") {
switch(language,
english = ifelse(os == "Windows", "English_United States.1252", "en_US.UTF-8"),
chinese = ifelse(os == "Windows", "Chinese", "zh_CN.utf-8"))
}
## set locale
Sys.setlocale(category = "LC_ALL", loc(Sys.info()[["sysname"]], "chinese"))
##加载工作环境和所需包
setwd("/Users/jeevanyue/Rproject/map/SHMetro")
library(data.table)
library(bit64)
library(dplyr)
library(tidyr)
library(scales)
library(lubridate) #日期处理包
library(shiny)
library(leaflet)
library(lattice)
library(plotly)
library(chorddiag) #绘制chord
1. 地铁数据
#地铁站进站数据和出站数据
shmetro_in <- fread("data/shmetro_in.csv",encoding="UTF-8")
shmetro_out <- fread("data/shmetro_out.csv",encoding="UTF-8")
## 进出地铁站数据
shmetro_line_in_out <- fread("data/shmetro_line_in_out.csv",encoding="UTF-8")
## 进出地铁站关联
in_out <- shmetro_line_in_out %>%
spread(line_out,count)
in_out[is.na(in_out)]<-0
## 地铁站经纬度
stations <- fread("data/stations.csv",encoding="UTF-8")
stations <- stations %>%
select(c(1:5)) %>%
arrange(line,line_id)
stations_no <- nrow(stations)
for (i in 1:stations_no) {
s <- stations$station[i]
stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/")
}
1.1 地铁站经纬度
stations <- fread("data/stations.csv",encoding="UTF-8")
stations <- stations %>%
select(c(1:5)) %>%
arrange(line,line_id)
stations_no <- nrow(stations)
for (i in 1:stations_no) {
s <- stations$station[i]
stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/")
}
invisible(gc())
1.2 交通卡交易数据
交通卡的交易信息有7个字段,分别是:卡号、交易日期、交易时间、站点名称、行业名称、交易金额、交易性质。
卡号:交通卡卡号
交易日期:日期格式yyyy-mm-dd
交易时间:时间个是hh:mm:ss
站点名称:内容包括线路和站名,如:"1号线莘庄"
行业名称:都是"地铁"
交易金额:0和大于0的值,0表示进站,大于0的值表示出战
交易性质:"优惠"和"非优惠"
#mac下用如下读取
system.time(trade <- read.csv("/Users/jeevanyue/Desktop/SPTCC-20150401.csv",header = F,sep=",", fileEncoding = "GB2312"))
#windows下用如下读取
#system.time(trade <- fread("SPTCC-20150401/SPTCC-20150401.csv",integer64='character',stringsAsFactors=F))
#trade <- read.csv('data/SPTCC-20150401_Sample.txt',header=T,encoding='UTF-8',stringsAsFactors = F)
#重命名
names(trade) <- c('card_id','date','time','station','vehicle','money','property')
#筛选地铁数据
trade_metro <- trade %>%
filter(vehicle=='地铁')
rm(trade)
invisible(gc())
#将"station"(原含义为线路和站名),分为"line"和"station"
trade_metro <- trade_metro %>%
separate(station, c('line', 'station'), sep = '号线')
invisible(gc())
#按五分钟统计时间,向上取整
trade_metro <- trade_metro %>%
mutate(M5=ceiling(period_to_seconds(hms(time))/300))
invisible(gc())
#删除不需要的字段
trade_metro <- trade_metro %>%
select(-vehicle,-property,-date)
invisible(gc())
#时间格式
#trade_metro$time <- strptime(paste("2015-04-01", trade_metro$time, sep=' '), "%Y-%m-%d %H:%M:%S", tz = "GMT")
1.3 处理异常值
## 对与stations地铁站名不一致的trade数据进行处理
trade_metro[trade_metro$station=="淞浜路",]$station <- "淞滨路"
trade_metro[trade_metro$station=="大木桥路 ",]$station <- "大木桥路"
trade_metro[trade_metro$station=="上海大学站",]$station <- "上海大学"
1.4 进/出站数据
## 进站数据
trade_metro_in <- trade_metro %>%
filter(money==0) %>%
select(card_id,"time_in"=time,"line_in"=line,"station_in"=station,"M5_in"=M5)
## 出站数据
trade_metro_out <- trade_metro %>%
filter(money>0)%>%
select(card_id,"time_out"=time,"line_out"=line,"station_out"=station,money,"M5_out"=M5)
1.5 虚拟换乘
上海火车站为虚拟换乘,删除半小时内3/4换1和1换3/4的数据
3/4换1的数据
## 3/4换1的数据,统计发现在上海火车站3/4号线出站以3号线名义出站
trade_metro_out_34 <- trade_metro_out %>%
filter(station_out=='上海火车站') %>%
filter(line_out==3 | line_out==4)
trade_metro_in_1 <- trade_metro_in %>%
filter(station_in=='上海火车站', line_in==1)
## merge出站和进站的数据
trade_metro_out34_in1 <- merge(trade_metro_out_34,trade_metro_in_1,all.x=T) %>%
mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>%
filter(duration>0,duration<=60*30)
### 数据大了什么样的数据都有,发现有几个人在3/4号线出站后,半小时内在1号线进站多次
#trade_metro_out34_in1 <- na.omit(trade_metro_out34_in1)
## 根据卡号和进站时间,查询最近的进站时间,作为本次进站时间
trade_metro_out34_in1 <- data.table(trade_metro_out34_in1)
trade_metro_out34_in1[, duration_min := min(duration), by=list(card_id, M5_in)]
trade_metro_out34_in1 <- trade_metro_out34_in1 %>%
filter(duration==duration_min) %>%
select(-duration_min)
## 统计发现绝大部分人在10分钟内完成换乘
#histogram(ceiling(trade_metro_out34_in1$duration/60))
## rbind出站数据
trade_metro_out <- rbind(trade_metro_out, trade_metro_out34_in1[,c(1:6)])
## 删除全部重复的出站数据
trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]
## rbind进站数据
trade_metro_in <- rbind(trade_metro_in, trade_metro_out34_in1[,c(1,7:10)])
## 删除全部重复的进站数据
trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
1换3/4的数据
## 1换3/4的数据,统计发现在3/4号线上海火车站以3号线名义进站
trade_metro_out_1 <- trade_metro_out %>%
filter(station_out=='上海火车站',line_out==1)
trade_metro_in_34 <- trade_metro_in %>%
filter(station_in=='上海火车站') %>%
filter(line_in==3 | line_in==4)
## merge出站和进站的数据
trade_metro_out1_in34 <- merge(trade_metro_out_1,trade_metro_in_34,all.x=T, all.y=F) %>%
mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>%
filter(duration>0,duration<=60*30)
#trade_metro_out1_in34 <- na.omit(trade_metro_out1_in34)
## 根据卡号和进站时间,查询最近的进站时间,作为本次进站时间
trade_metro_out1_in34 <- data.table(trade_metro_out1_in34)
trade_metro_out1_in34[, duration_min := min(duration), by=list(card_id, M5_in)]
trade_metro_out1_in34 <- trade_metro_out1_in34 %>%
filter(duration==duration_min) %>%
select(-duration_min)
## 统计发现绝大部分人在10分钟内完成换乘
#histogram(ceiling(trade_metro_out1_in34$duration/60))
## rbind出站数据
trade_metro_out <- rbind(trade_metro_out, trade_metro_out1_in34[,c(1:6)])
## 删除全部重复的出站数据
trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]
## rbind进站数据
trade_metro_in <- rbind(trade_metro_in, trade_metro_out1_in34[,c(1,7:10)])
## 删除全部重复的进站数据
trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]
1.6 地铁站进站数据
根据消费金额为0,每5分钟统计每站地铁的进站人数
trade_metro_in_station <- trade_metro_in %>%
group_by(station_in, M5_in) %>%
summarise(count=n()) %>%
select(station=station_in, M5=M5_in, count)
#trade_metro_in_station <- na.omit(trade_metro_in_station)
invisible(gc())
## 合并地铁站坐标
shmetro_in <- merge(trade_metro_in_station,stations,all.x=T, all.y=F)
#rm(trade_metro_in_station)
## 查看未匹配到的地铁站
#l <- shmetro_in[is.na(shmetro_in$gps_lat),]
#unique(l$station)
#trade_metro_in_station[trade_metro_in_station$station=="淞浜路",]$station <- "淞滨路"
#trade_metro_in_station[trade_metro_in_station$station=="大木桥路 ",]$station <- "大木桥路"
#trade_metro_in_station[trade_metro_in_station$station=="上海大学站",]$station <- "上海大学"
#stations[grepl("淞滨路", stations$station),]$station
#trade_metro_in_station[grepl("淞浜路", trade_metro_in_station$station),]$station <- "淞滨路"
#shmetro_in <- na.omit(shmetro_in)
invisible(gc())
#write.csv(shmetro_in,"shmetro_in.csv",row.names = F,fileEncoding="UTF-8")
1.7 地铁站出站数据
根据消费金额大雨0,每5分钟统计每站地铁的出站人数
trade_metro_out_station <- trade_metro_out %>%
group_by(station_out, M5_out) %>%
summarise(count=n()) %>%
select(station=station_out, M5=M5_out, count)
#trade_metro_out_station <- na.omit(trade_metro_out_station)
invisible(gc())
## 合并地铁站坐标
shmetro_out <- merge(trade_metro_out_station,stations,all.x=T, all.y=F)
#rm(trade_metro_out_station)
#shmetro_out <- na.omit(shmetro_out)
invisible(gc())
#write.csv(shmetro_out,"shmetro_out.csv",row.names = F,fileEncoding="UTF-8")
1.8 地铁线路起始和终点
## merge进站和出站数据,并计算乘坐时间
trade_metro_in_out <- merge(trade_metro_in, trade_metro_out, all.x=T, all.y=F) %>%
mutate(duration=period_to_seconds(hms(time_out)) - period_to_seconds(hms(time_in)), duration_M5=M5_out-M5_in) %>%
filter(duration>0)
#根据卡号和进站时间,查询最近出站的时间,作为本次出站时间
trade_metro_in_out <- data.table(trade_metro_in_out)
trade_metro_in_out[, duration_min := min(duration), by=list(card_id, M5_in)]
trade_metro_in_out <- trade_metro_in_out %>%
filter(duration==duration_min) %>%
select(-duration_min)
#统计进站线路A->出站线路B的笔数
shmetro_line_in_out <- trade_metro_in_out %>%
group_by(line_in,line_out) %>%
summarise(count=n())
shmetro_line_in_out$line_in <- as.numeric(shmetro_line_in_out$line_in)
shmetro_line_in_out$line_out <- as.numeric(shmetro_line_in_out$line_out)
#排序
shmetro_line_in_out <- arrange(shmetro_line_in_out,line_in,line_out)
#将出站线路数据转换为属性字段
in_out <- shmetro_line_in_out %>%
spread(line_out,count)
in_out[is.na(in_out)]<-0
#write.csv(shmetro_line_in_out,"shmetro_line_in_out.csv",row.names = F,fileEncoding="UTF-8")
2. 绘图
2.1 相关数据及地图
#地铁颜色
lines_color <- data.frame("line"=c(1:13,16),"color"=c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA"))
pal <- colorFactor(as.character(lines_color$color), domain = stations$line)
#辅助函数绘制线路
draw_line_add <- function(l_no,line_s_id=NULL){
line_color <- lines_color[lines_color$line==l_no,]$color
line_data <- stations[stations$line==l_no,]
if(is.null(line_s_id)){
draw_lines <- Shanghai %>%
addPolylines(lat=line_data$gps_lat,lng=line_data$gps_lon,color=line_color,weight=2)
}else{
draw_lines <- Shanghai %>%
addPolylines(lat=line_data$gps_lat[line_s_id],lng=line_data$gps_lon[line_s_id],color=line_color,weight=2)
}
return(draw_lines)
}
## 上海线路地图
Shanghai <- leaflet() %>%
setView(lng = 121.60, lat = 31.20, zoom = 10) %>%
addProviderTiles("CartoDB.Positron") %>%
addLegend(position = "bottomleft",pal=pal,values = stations$line)
for(l in unique(stations$line)){
line_length <- nrow(stations[stations$line==l,])
if(l==4){
#由于4号线为环线,需将首尾相连
Shanghai <- draw_line_add(l_no=l)
Shanghai <- draw_line_add(l_no=l,line_s_id=c(1,line_length))
}else if(l==10){
#由于10号线在龙溪路站以后分为两条线路,需分两端绘制
Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-3)))
Shanghai <- draw_line_add(l_no=l,line_s_id=c(24,(line_length-2):line_length))
}else if(l==11){
#由于11号线在嘉定新城站以后分为两条线路,需分两端绘制
Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-7)))
Shanghai <- draw_line_add(l_no=l,line_s_id=c(28,(line_length-6):line_length))
}else{
Shanghai <- draw_line_add(l_no=l)
}
}
2.2 chord图数据
##绘制chord图
metro_chord <- data.matrix(as.data.frame(in_out)[,c(2:15)])
haircolors <- in_out$line_in
dimnames(metro_chord) <- list(have = haircolors,
prefer = colnames(metro_chord))
groupColors <- c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA")
#chorddiag(metro_chord, groupColors = groupColors, margin=50, showTicks=F, groupnamePadding = 5)
2.3 图形参数
b <- list(x = 0, y = 1,bgcolor = "#00FFFFFF")
yax <- list(
title = "",
zeroline = FALSE,
showline = FALSE,
showticklabels = FALSE,
showgrid = FALSE
)
xax <- list(
title = "",
titlefont = list(size = 8),
tickangle = -20,
color = "black"
)
2.4 UI 和 SERVER
ui <- shinyUI(navbarPage("SHMetro",
tabPanel("进站流量",
div(class="outer",
#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),
tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),
leafletOutput("map", width = "100%", height = "100%"),
absolutePanel(top = 10, right = 10,
h4(textOutput("output_slider_time")),
sliderInput("slider_time", "Time:",
#min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "2015-04-01", tz = "GMT"),
#max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"),
#value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"),
min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),
max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"),
value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),
step = 60*5,
timeFormat = "%T",
timezone = "GMT"),
selectInput("select_line", "Line",
c("All",lines_color$line)),
h4("TOP 5"),
plotlyOutput("in_top5",height = 200),
checkboxInput("legend", "Show legend", TRUE)
)
)
),
tabPanel("出站流量",
div(class="outer",
#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),
tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),
leafletOutput("map_out", width = "100%", height = "100%"),
absolutePanel(top = 10, right = 10,
h4(textOutput("output_slider_time_out")),
sliderInput("slider_time_out", "Time:",
#min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "1960-01-01", tz = "GMT"),
#max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"),
#value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"),
min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),
max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"),
value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),
step = 60*5,
timeFormat = "%T",
timezone = "GMT"),
selectInput("select_line_out", "Line",
c("All",lines_color$line)),
h4("TOP 5"),
plotlyOutput("out_top5",height = 200),
checkboxInput("legend_out", "Show legend", TRUE)
)
)
),
tabPanel("线路关联",
div(class="outer",
#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),
tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),
chorddiagOutput("line_chord", width = "100%",height="100%")
)
)
)
)
server <- shinyServer(function(input, output, session) {
## 进站流量统计
# Reactive expression for the data subsetted to what the user selected
filteredData <- reactive({
if(input$select_line=="All"){
shmetro_in %>%
filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300))
}else{
shmetro_in %>%
filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300),line==as.numeric(input$select_line))
}
})
stations_in_top5 <- reactive({
filteredData() %>%
group_by(station) %>%
summarise(count=sum(count),line=min(line)) %>%
arrange(desc(count)) %>%
head(5) %>%
as.data.frame()
})
## time
output$output_slider_time <- renderText({
paste0("Time: ", format(input$slider_time,"%H:%M:%S"))
})
output$map <- renderLeaflet({
Shanghai %>%
addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>%
clearMarkerClusters() %>%
clearMarkers()
})
observe({
data_in_circle <- data.table(filteredData())[, count := sum(count), by=list(station, M5)] %>%
arrange(count)
leafletProxy("map", data = data_in_circle) %>%
clearMarkerClusters() %>%
clearMarkers() %>%
addCircleMarkers(data_in_circle$gps_lon,data_in_circle$gps_lat, color = pal(data_in_circle$line), fillOpacity = 0.5,stroke = FALSE, popup=paste(data_in_circle$station,data_in_circle$line,data_in_circle$count,sep=","), radius=(data_in_circle$count)^(1/2.5))
})
# top5
output$in_top5 <- renderPlotly({
# If no stations_in_top5 are in view, don't plot
if (nrow(stations_in_top5()) == 0)
return(NULL)
plot_ly(stations_in_top5(),
x = stations_in_top5()$station,
y = stations_in_top5()$count,
type = "bar",
marker = list(color = pal(stations_in_top5()$line)),
bgcolor = "#00FFFFFF") %>%
layout(showlegend=FALSE,
yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF',
paper_bgcolor='#00FFFFFF')
})
# Use a separate observer to recreate the legend as needed.
observe({
proxy <- leafletProxy("map")
# Remove any existing legend, and only if the legend is
# enabled, create a new one.
proxy %>% clearControls()
if (input$legend) {
proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line)
}
})
## 出站流量统计
# Reactive expression for the data subsetted to what the user selected
filteredData_out <- reactive({
if(input$select_line_out=="All"){
shmetro_out %>%
filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300))
}else{
shmetro_in %>%
filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300),line==as.numeric(input$select_line_out))
}
})
stations_out_top5 <- reactive({
filteredData_out() %>%
group_by(station) %>%
summarise(count=sum(count),line=min(line)) %>%
arrange(desc(count)) %>%
head(5) %>%
as.data.frame()
})
## time
output$output_slider_time_out <- renderText({
paste0("Time: ", format(input$slider_time_out,"%H:%M:%S"))
})
output$map_out <- renderLeaflet({
Shanghai %>%
addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>%
clearMarkerClusters() %>%
clearMarkers()
})
observe({
data_out_circle <- data.table(filteredData_out())[, count := sum(count), by=list(station, M5)] %>%
arrange(count)
leafletProxy("map_out", data = filteredData_out()) %>%
clearMarkerClusters() %>%
clearMarkers() %>%
addCircleMarkers(data_out_circle$gps_lon, data_out_circle$gps_lat, color = pal(data_out_circle$line),fillOpacity = 0.5,stroke = FALSE, popup=paste(data_out_circle$station,data_out_circle$line,data_out_circle$count,sep=","), radius=(data_out_circle$count)^(1/2.5))
})
# top5
output$out_top5 <- renderPlotly({
# If no stations_in_top5 are in view, don't plot
if (nrow(stations_out_top5()) == 0)
return(NULL)
plot_ly(stations_out_top5(),
x = stations_out_top5()$station,
y = stations_out_top5()$count,
type = "bar",
marker = list(color = pal(stations_out_top5()$line)),
bgcolor = "#00FFFFFF") %>%
layout(showlegend=FALSE,
yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF',
paper_bgcolor='#00FFFFFF')
})
# Use a separate observer to recreate the legend as needed.
observe({
proxy <- leafletProxy("map_out")
# Remove any existing legend, and only if the legend is
# enabled, create a new one.
proxy %>% clearControls()
if (input$legend_out) {
proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line)
}
})
## 线路关联
output$line_chord <- renderChorddiag({
chorddiag(metro_chord, groupColors = groupColors, showTicks=F, groupnamePadding = 5)
})
})
2.5 运行shinyApp
shinyApp(ui = ui,server = server)
进站流量
出站流量
进出地铁
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。