############### R-CODE FOR PROJECT TYCHO CIRCLE DIAGRAM ######################################################### # This code provides the basic structure of the Projec Tycho circle diagram. This code has not been generalized # # to use as function to various types of data, but is meant to reveal the structure and functions used for # # adaptation to other types of data. # # # # Please cite the original publication of this graph as the origin of this code or refer to the Project Tycho # # website (www.tycho.pitt.edu). # # # # The original study citatiion is: Willem G. van Panhuis, John Grefenstette, Su Yon Jung, Nian Shong Chok, # # Anne Cross, Heather Eng, Bruce Y Lee, Vladimir Zadorozhny, Shawn Brown, Derek Cummings, Donald S. Burke. # # Contagious Diseases in the United States from 1888 to the present. NEJM 2013; 369(22): 2152-2158 # # # # This code has been created by: Wilbert van Panhuis, University of Pittsburgh Graduate School of Public # # Health, Email: wav10@pitt.edu) # ################################################################################################################# ################################################################################################################# # This code includes the following subcomponents: # # 1. it draws the concentric circles and the spokes radiating from the center to the outer ring # # 2. it determines start and end-coordinates for lines to be added in each quadrant # # 3. for each disease, it includes a loop that: # # 3.1. determines data availability and the number of subcategories reported each year # # 3.2. plots the year-to-year line segments for each year with available data with different colors depending # # on the number of subcategories # # 3.3. repeats 3.1 and 3.2 for death reports, these are colored in red # ################################################################################################################# ################################################################################################################# # This code uses the following data: # # 1. A data frame with the start year and total length (in weeks) of time series per disease # # 2. For each disease, a data frame with the number of subcategories per year: # # NA = no data (no line segments plotted) # # positive = data, i.e. line segments plotted with different colors for each no. of subcategories # ################################################################################################################# # START DATASET, FOR EACH DISEASE, THE START YEAR AND THE TOTAL NUMBER OF WEEKS (INCL. WEEKS WITHOUT DATA) dis00<-# data from some source names(dis00)<-c("disease", "startyr") dis00$weeks<-aggregate(dis$year, by=list(dis$disease), length)[,2] dis00<-dis00[order(dis00$startyr, dis00$weeks, decreasing=F),] lun<-function(x){ length(unique(x)) } # CIRCLE DIAGRAM, WITH OUTPUT WRITTEN TO VECTOR FORMAT POSTCRIPT FILE postscript("C:\\Users\\wav10\\Vanpanhuis\\Research\\Manuscripts\\In_progress\\Tycho_impactvaccines\\NEJM_revision\\Fig1.ps", width=4.5, height=4.5, horizontal=F, paper="letter", pagecentre=T) #dev.new(5.5,5.5), USE THIS TO PLOT IN THE GRAPHICS DEVICE ON SCREEN # PLOT THE CONCENTRIC CIRCLES circle<-function(cx,cy,r,n, col, border){ nq<-n/4 circ<-2*pi*r rads<-(c(1:nq) * (circ/n)) / r x1<-vector() x2<-vector() x3<-vector() x4<-vector() y1<-vector() y2<-vector() y3<-vector() y4<-vector() for(a in 1:nq){ x1[a]=cx + (cos(rads[a])*r) # Q1 y1[a]=cy + (sin(rads[a])*r) x2[a]=cx - (sin(rads[a])*r) # Q4 y2[a]=cy + (cos(rads[a])*r) x3[a]=cx - (cos(rads[a])*r) # Q3 y3[a]=cy - (sin(rads[a])*r) x4[a]=cx + (sin(rads[a])*r) # Q2 y4[a]=cy - (cos(rads[a])*r) } x<-c(x1,x2,x3,x4) y<-c(y1,y2,y3,y4) polygon(x, y, col=col, border=border) } par(mar=c(0,0,0,0)) plot(c(1:100), c(1:100), pch="", axes=F, xlab="", ylab="") for (i in 0:13){ circle(50,50,50-(i*(50/13)), 100, col=NA, border="grey70") } # PLOT THE SPOKES radials<-function(cx,cy,r,quad,pcirc,posquad, col, lwd) { circ<-2*pi*r rads<-(pcirc*circ*posquad) / r x1<-cx y1<-cy if (quad==1){ x2<-cx + (cos(rads)*r) y2<-cy + (sin(rads)*r) } if (quad==2){ x2<-cx + (sin(rads)*r) y2<-cy - (cos(rads)*r) } if (quad==3){ x2<-cx - (cos(rads)*r) y2<-cy - (sin(rads)*r) } if (quad==4){ x2<-cx - (sin(rads)*r) y2<-cy + (cos(rads)*r) } x<-c(x1,x2) y<-c(y1,y2) lines(x, y, col=col, lwd=lwd) c(x,y) } # DETERMINE THE COORDINATES FOR THE DISEASE LINES IN EACH QUADRANT guides<-list() for (i in 1:20){ guides[[i]]<-radials(cx=50, cy=50, r=50, quad=1, pcirc=1/40, posquad=c(8:1)[i], col="grey70", lwd=1) guides[[i+20]]<-radials(cx=50, cy=50, r=50, quad=2, pcirc=1/52, posquad=c(13:1)[i], col="grey70", lwd=1) guides[[i+40]]<-radials(cx=50, cy=50, r=50, quad=3, pcirc=1/76, posquad=c(19:1)[i], col="grey70", lwd=1) guides[[i+60]]<-radials(cx=50, cy=50, r=50, quad=4, pcirc=1/80, posquad=c(20:5)[i], col="grey70", lwd=1) } guides<-data.frame(do.call("rbind", guides)) names(guides)<-c("x1", "x2", "y1", "y2") guides<-guides[!is.na(guides$x2),] guides$xpyr<-abs((guides$x2 - guides$x1))/133 # 8 years from centroid to first ring (1888) guides$ypyr<-abs((guides$y2 - guides$y1))/133 # LOOP FUNCTION TO DRAW DISEASE LINE SEGMENTS FOR EACH DISEASE SEPARATELY, 56 DISEASES IN THIS CASE for(i in 1:56){ dat<-data.frame(c(1888:2011)) names(dat)<-"year" # DETERMINES THE COORDINATES FOR EACH YEAR ALONG THE SPOKES if(guides$x2[i]>=50 & guides$y2[i]>=50) { dat$x<-guides$x1[i] + c(11:134)*guides$xpyr[i] dat$y<-guides$y1[i] + c(11:134)*guides$ypyr[i]} if(guides$x2[i]<50 & guides$y2[i]>=50) { dat$x<-guides$x1[i] - c(11:134)*guides$xpyr[i] dat$y<-guides$y1[i] + c(11:134)*guides$ypyr[i]} if(guides$x2[i]>=50 & guides$y2[i]<50) { dat$x<-guides$x1[i] + c(11:134)*guides$xpyr[i] dat$y<-guides$y1[i] - c(11:134)*guides$ypyr[i]} if(guides$x2[i]<50 & guides$y2[i]<50) { dat$x<-guides$x1[i] - c(11:134)*guides$xpyr[i] dat$y<-guides$y1[i] - c(11:134)*guides$ypyr[i]} # LOAD DISEASE CASES OR DEATHS DATA dis01<-# FOR EACH YEAR AND FOR CASES AND DEATHS SEPARATELY, LIST THE DISEASE AND EACH SUBCATEGORY # DRAW LINES FOR CASES ONLY # FIRST SELECT RECORDS ABOUT CASES if (sum(dis01$event=="CASES")>0) { # THEN COUNT THE NUMBER OF SUBCATEGORIES PER YEAR dis01cas<-aggregate(dis01$subcategory_new[dis01$event=="CASES"], by=list(dis01$year[dis01$event=="CASES"]), lun) names(dis01cas)<-c("year", "subcat") # MERGE DATA AVAILABILITY WITH THE COORDINATES ON THE SPOKES dis01cas<-merge(dis01cas, dat, all.y=T) # DEFINE THE LINE SEGMENTS for(a in 1:123){ if(!is.na(dis01cas$subcat[a]) & !is.na(dis01cas$subcat[a+1])){ x1<-dis01cas$x[a] x2<-dis01cas$x[a+1] y1<-dis01cas$y[a] y2<-dis01cas$y[a+1] # PLOT THE LINE SEGMENTS if (dis01cas$subcat[a]==1) lines(x=c(x1,x2), y=c(y1,y2), lwd=6, col="black") if (dis01cas$subcat[a]==2) lines(x=c(x1,x2), y=c(y1,y2), lwd=6, col="limegreen") if (dis01cas$subcat[a]==3) lines(x=c(x1,x2), y=c(y1,y2), lwd=6, col="blue") if (dis01cas$subcat[a]>3) lines(x=c(x1,x2), y=c(y1,y2), lwd=6, col="orange") } } } # DRAW LINES FOR DEATHS IN THE ABSENCE OF CASES DATA if (sum(dis01$event=="CASES")==0 & sum(dis01$event=="DEATHS")>0) { dis01dea<-aggregate(dis01$subcategory_new[dis01$event=="DEATHS"], by=list(dis01$year[dis01$event=="DEATHS"]), lun) names(dis01dea)<-c("year", "subcat") dis01dea<-merge(dis01dea, dat, all.y=T) for(a in 1:123){ if(!is.na(dis01dea$subcat[a]) & !is.na(dis01dea$subcat[a+1])){ x1<-dis01dea$x[a] x2<-dis01dea$x[a+1] y1<-dis01dea$y[a] y2<-dis01dea$y[a+1] lines(x=c(x1,x2), y=c(y1,y2), lwd=6, col="red")} } } # DRAW LINES FOR DEATHS IN TEH PRESENCE OF CASE DATA (OFFSET LINE VS. THE SPOKE COORDINATES) if (sum(dis01$event=="CASES")>0 & sum(dis01$event=="DEATHS")>0) { dat<-data.frame(c(1888:2010)) names(dat)<-"year" # assign small decrease of y values if(guides$x2[i]>=50 & guides$y2[i]>=75) { dat$x<-guides$x1[i] + c(11:133)*guides$xpyr[i] + 0.4 dat$y<-guides$y1[i] + c(11:133)*guides$ypyr[i] -0.3} if(guides$x2[i]>=50 & guides$y2[i]>=50 & guides$y2[i]<75) { dat$x<-guides$x1[i] + c(11:133)*guides$xpyr[i] dat$y<-guides$y1[i] + c(11:133)*guides$ypyr[i] -0.5} if(guides$x2[i]<50 & guides$y2[i]>=50) { dat$x<-guides$x1[i] - c(11:133)*guides$xpyr[i] dat$y<-guides$y1[i] + c(11:133)*guides$ypyr[i]} if(guides$x2[i]>=50 & guides$y2[i]<50) { dat$x<-guides$x1[i] + c(11:133)*guides$xpyr[i] -0.5 dat$y<-guides$y1[i] - c(11:133)*guides$ypyr[i]-0.1 } if(guides$x2[i]<50 & guides$y2[i]<50) { dat$x<-guides$x1[i] - c(11:133)*guides$xpyr[i] dat$y<-guides$y1[i] - c(11:133)*guides$ypyr[i]} dis01dea<-aggregate(dis01$subcategory_new[dis01$event=="DEATHS"], by=list(dis01$year[dis01$event=="DEATHS"]), lun) names(dis01dea)<-c("year", "subcat") dis01dea<-merge(dis01dea, dat, all.y=T) for(a in 1:123){ if(!is.na(dis01dea$subcat[a]) & !is.na(dis01dea$subcat[a+1])){ x1<-dis01dea$x[a] x2<-dis01dea$x[a+1] y1<-dis01dea$y[a] y2<-dis01dea$y[a+1] lines(x=c(x1,x2), y=c(y1,y2), lwd=4, col="red")} } } # END OF THE DISEASE SPECIFIC LOOPS } # CREATE WHITE CIRCLE IN THE MIDDLE points(50,50, pch=20, col="white", cex=6) dev.off()