最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會(huì)員登陸 & 注冊(cè)

R語(yǔ)言公交地鐵路線網(wǎng)絡(luò)圖實(shí)現(xiàn)數(shù)據(jù)挖掘?qū)崙?zhàn)

2021-05-31 21:44 作者:拓端tecdat  | 我要投稿

原文鏈接

對(duì)于龐大的公交地鐵路線信息的數(shù)據(jù)挖掘,一般軟件遇到的問(wèn)題主要有兩點(diǎn):1.對(duì)于文本信息的挖掘,特別是中文詞匯的挖掘,缺乏成熟的工具或者軟件包,2.對(duì)于大數(shù)據(jù)量,一般軟件的讀取和處理會(huì)遇到問(wèn)題。即使一個(gè)月的部分區(qū)域路線信息也會(huì)達(dá)到幾百m以上,因此,對(duì)于這類數(shù)據(jù),無(wú)論從算法運(yùn)行還是數(shù)據(jù)讀取來(lái)說(shuō)普通的SQL語(yǔ)言或者matlab軟件處理起來(lái)都乏善可陳。對(duì)于這類數(shù)據(jù),我們一般用r軟件可以輕松實(shí)現(xiàn)讀取,數(shù)據(jù)挖掘以及可視化的過(guò)程。

例如對(duì)于下面這樣的車站數(shù)據(jù):

和近600M的進(jìn)出站信息的數(shù)據(jù), 如果要實(shí)現(xiàn)每隔一段時(shí)間的對(duì)應(yīng)路線的進(jìn)出站人數(shù)整理以及可視化的過(guò)程,我們可以進(jìn)行一下的步驟進(jìn)行分析:

首先我們進(jìn)行數(shù)據(jù)的讀取和預(yù)處理

  1. install.packages("dplyr")


  2. library("dplyr")#讀取dplyr包用以排序


  3. ###對(duì)數(shù)據(jù)讀取


  4. data=read.table("E:\\201501一卡通進(jìn)出站.txt",stringsAsFactors=F)


  5. ##對(duì)數(shù)據(jù)列進(jìn)行命名


  6. colnames(data)=c("邏輯卡號(hào)",


  7. "交易日期" ,


  8. "交易時(shí)間",


  9. "票種",


  10. "交易代碼",


  11. "交易車站",


  12. "上次交易車站")


  13. ###對(duì)數(shù)據(jù)進(jìn)行預(yù)處理


  14. for( ii in 20150101:20150131){#每天的數(shù)據(jù)


  15. data1=data[which(data[,2]==ii),]#篩選出日期為20150101這天的數(shù)據(jù)


  16. data2=data1[,c(2,3,6,7)]#篩選出"交易日期" ,"交易時(shí)間", "交易車站","上次交易車站"的數(shù)據(jù)


  17. data2#查看數(shù)據(jù)


  18. data2=data2[order(data2$交易車站),]


  19. line1=data2[substr(data2$交易車站,1,1)=="1",]#1號(hào)線


  20. line2=data2[substr(data2$交易車站,1,1)=="2",]#2號(hào)線


  21. ###篩選出車站為243


  22. bus=unique(data2[,3])####################每個(gè)站的數(shù)據(jù)


  23. for(busi in 1:length(bus)){


  24. index=which(data2[,3]==bus[busi])#篩選出車站為243的數(shù)據(jù)行號(hào)


  25. data3=data2[index,]#獲取交易車站為243的數(shù)據(jù)


  26. ###data3=data2[order(data2$交易車站),]#如果不篩選車站,直接按交易車站遞增排序


  27. data4=arrange(data3,交易日期,交易時(shí)間)#對(duì)時(shí)間排序,先按年份遞增排序,然后按照時(shí)間遞增排序


  28. ###按每十分鐘時(shí)間分割


  29. for (time in 6:21){


  30. for(i in 1:6){


  31. index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))


  32. datat=data4[index,]


  33. outnum=length(which(datat[,4]!=0))


  34. innum=length(which(datat[,4]==0))


  35. if(i!=6)cat(file=paste("E:\\",bus[busi],"車站",ii,"日一卡通進(jìn)出站時(shí)間.txt"),append=TRUE,ii,"日",time,"點(diǎn)",i-1,"0分到",i,"0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  36. else cat(file=paste("E:\\",bus[busi],"車站",ii,"日一卡通進(jìn)出站時(shí)間.txt"),append=TRUE,ii,"日",time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  37. #cat(file="E:\\243車站一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  38. }


  39. }


  40. #篩選出出站人數(shù)


  41. dataout=data3[which(data3[,4]!=0),]#上次交易車站不為0,為出站人數(shù)


  42. datain=data3[which(data3[,4]==0),]


  43. ###將數(shù)據(jù)進(jìn)行輸出


  44. write.table(data4,paste("E:\\",ii,"日 ",bus[busi],"車站一卡通進(jìn)出站整理.txt"))#將數(shù)據(jù)整理好輸出到指定的目錄文件名


  45. }


  46. }


  47. ####################################################################################3


  48. ################1,2號(hào)線##########


  49. data2=data2[order(data2$交易車站),]


  50. line1=data2[substr(data2$交易車站,1,1)=="1",]#1號(hào)線


  51. line2=data2[substr(data2$交易車站,1,1)=="2",]#2號(hào)線


  52. #########1號(hào)線


  53. data4=arrange(line1,交易日期,交易時(shí)間)#對(duì)時(shí)間排序,先按年份遞增排序,然后按照時(shí)間遞增排序


  54. ###按每十分鐘時(shí)間分割


  55. cat(file="E:\\1號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE, " 點(diǎn)", " 分"," 出站人數(shù)", " ","進(jìn)站人數(shù) " ,"\n")


  56. for (time in 6:21){


  57. for(i in 1:6){


  58. index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))


  59. datat=data4[index,]


  60. outnum=length(which(datat[,4]!=0))


  61. innum=length(which(datat[,4]==0))


  62. if(i!=6)cat(file="E:\\1號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"點(diǎn)",i-1,"0分到",i,"0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  63. else cat(file="E:\\1號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n") #


  64. #cat(file="E:\\20150101日243車站一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  65. }


  66. }


  67. #篩選出出站人數(shù)


  68. dataout=data3[which(data3[,4]!=0),]#上次交易車站不為0,為出站人數(shù)


  69. datain=data3[which(data3[,4]==0),]


  70. numout=dim(dataout)[1]#出站人數(shù)總和


  71. numin=dim(datain)[1]#進(jìn)站人數(shù)總和


  72. ###將數(shù)據(jù)進(jìn)行輸出


  73. write.table(data4,"E:\\1號(hào)線一卡通進(jìn)出站整理.txt")#將數(shù)據(jù)整理好輸出到指定的目錄文件名


  74. ########2號(hào)線


  75. data4=arrange(line2,交易日期,交易時(shí)間)#對(duì)時(shí)間排序,先按年份遞增排序,然后按照時(shí)間遞增排序


  76. ###按每十分鐘時(shí)間分割


  77. cat(file="E:\\2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE, " 點(diǎn)", " 分"," 出站人數(shù)", " ","進(jìn)站人數(shù) " ,"\n")


  78. for (time in 6:21){


  79. for(i in 1:6){


  80. index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))


  81. datat=data4[index,]


  82. outnum=length(which(datat[,4]!=0))


  83. innum=length(which(datat[,4]==0))


  84. if(i!=6)cat(file="E:\\2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 "," ",outnum," "," ",innum,"\n")#cat(time,"點(diǎn)",i-1,"0分到",i,"0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  85. else cat(file="E:\\2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum," "," ",innum,"\n")#cat(time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n") #


  86. #cat(file="E:\\TB related\\Service\\temp\\20150101日243車站一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  87. }


  88. }


  89. #篩選出出站人數(shù)


  90. dataout=data3[which(data3[,4]!=0),]#上次交易車站不為0,為出站人數(shù)


  91. datain=data3[which(data3[,4]==0),]


  92. ###將數(shù)據(jù)進(jìn)行輸出


  93. write.table(data4,"E:\\2號(hào)線一卡通進(jìn)出站整理.txt")#將數(shù)據(jù)整理好輸出到指定的目錄文件名


  94. #########1,2總和


  95. data4=arrange(line1,交易日期,交易時(shí)間)#對(duì)時(shí)間排序,先按年份遞增排序,然后按照時(shí)間遞增排序


  96. data44=arrange(line2,交易日期,交易時(shí)間)#對(duì)時(shí)間排序,先按年份遞增排序,然后按照時(shí)間遞增排序


  97. cat(file="E:\\1,2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE, " 點(diǎn)", " 分"," 出站人數(shù)", " ","進(jìn)站人數(shù) " ,"\n")


  98. for (time in 6:21){


  99. for(i in 1:6){


  100. index=intersect(which(data4[,2]>time*10000+(i-1)*1000),which(data4[,2]<=time*10000+1000*i))


  101. index2=intersect(which(data44[,2]>time*10000+(i-1)*1000),which(data44[,2]<=time*10000+1000*i))


  102. datat=data4[index,]


  103. datat1=data44[index2,]


  104. outnum=length(which(datat[,4]!=0))


  105. outnum1=length(which(datat1[,4]!=0))


  106. innum=length(which(datat[,4]==0))


  107. innum1=length(which(datat1[,4]==0))


  108. if(i!=6)cat(file="E:\\1,2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 "," ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"點(diǎn)",i-1,"0分到",i,"0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  109. else cat(file="E:\\1,2號(hào)線一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time," ",i-1,"0 ", " ",outnum+outnum1," "," ",innum+innum1,"\n")#cat(time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n") #


  110. #cat(file="E:\\20150101日243車站一卡通進(jìn)出站時(shí)間.txt",append=TRUE,time,"點(diǎn)",i-1,"0分到",time+1,"點(diǎn)0分的出站人數(shù)為",outnum," ","進(jìn)站人數(shù)為",innum,"\n")


  111. }


  112. }


  113. }

通過(guò)以上過(guò)程,我們可以將整理后的數(shù)據(jù)輸出到對(duì)應(yīng)的文件中:

以及交通路線的可視化過(guò)程;

對(duì)于交通路線的網(wǎng)絡(luò)圖來(lái)說(shuō),r中igraph包的確是實(shí)現(xiàn)利器:

  1. #讀取數(shù)據(jù)


  2. ljhdat1=readLines("E:/ shanghai_1.txt" )

  3. ljhdat2=readLines("E:/ shanghai_2.txt")


  4. ljhdat3=readLines("E:/ shanghai_3.txt")

  5. ljhdat4=readLines("E:/ shanghai_4.txt")


  6. ljhdat5=readLines("E:/ shanghai_5.txt")

  7. bus=""#建立巴士信息庫(kù)

  8. for(i in 1:length(ljhdat1)){

  9. if(ljhdat1[i]=="")bus=c(bus,ljhdat1[i-1])#提取每個(gè)巴士的路線信息

  10. }

  11. for(i in 1:length(ljhdat2)){

  12. if(ljhdat2[i]=="")bus=c(bus,ljhdat2[i-1])#提取每個(gè)巴士的路線信息

  13. }

  14. for(i in 1:length(ljhdat3)){

  15. if(ljhdat3[i]=="")bus=c(bus,ljhdat3[i-1])#提取每個(gè)巴士的路線信息

  16. }

  17. for(i in 1:length(ljhdat4)){

  18. if(ljhdat4[i]=="")bus=c(bus,ljhdat4[i-1])#提取每個(gè)巴士的路線信息

  19. }

  20. for(i in 1:length(ljhdat5)){

  21. if(ljhdat5[i]=="")bus=c(bus,ljhdat5[i-1])#提取每個(gè)巴士的路線信息

  22. }

  23. bus;

  24. bus=bus[-1]

  25. route=list(0)#建立路線信息

  26. #######################分割路線得到站點(diǎn)信息 #################################

  27. route[[1]]=unlist(strsplit(bus[1],split=" "))[-1]

  28. route[[1]]=route[[1]][-which(route[[1]]=="#")]#刪除#號(hào)

  29. n=length(route[[1]])

  30. library(igraph)

  31. d = data.frame(route[[1]][1:n-1] ,route[[1]][2:n ]#建立鄰接矩陣

  32. )

  33. g = graph.data.frame(d, directed = TRUE)

  34. plot(g )

  35. ################################分割所有路線得到站點(diǎn)信息###########################

  36. library(igraph)

  37. route1=character(0);

對(duì)于最后生成的網(wǎng)絡(luò)圖由于路線眾多,在查看的過(guò)程中可以通過(guò)設(shè)置可視化參數(shù)來(lái)進(jìn)一步優(yōu)化。

?


R語(yǔ)言公交地鐵路線網(wǎng)絡(luò)圖實(shí)現(xiàn)數(shù)據(jù)挖掘?qū)崙?zhàn)的評(píng)論 (共 條)

分享到微博請(qǐng)遵守國(guó)家法律
黄石市| 师宗县| 尼木县| 夏津县| 织金县| 旺苍县| 即墨市| 郧西县| 玛沁县| 南通市| 平陆县| 澜沧| 永川市| 盐城市| 磐安县| 东兴市| 松原市| 玉林市| 贵州省| 新建县| 克拉玛依市| 遂溪县| 曲周县| 滨海县| 达尔| 夏河县| 慈溪市| 廉江市| 江源县| 洪雅县| 广平县| 三门县| 土默特右旗| 监利县| 汕头市| 贞丰县| 长武县| 天峻县| 四会市| 乐平市| 赣州市|