## Expressões Regulares ## http://en.wikibooks.org/wiki/R_Programming/Text_Processing ## http://www.johndcook.com/r_language_regex.html ## http://www.regular-expressions.info/rlanguage.html ## http://biostat.mc.vanderbilt.edu/wiki/pub/Main/SvetlanaEdenRFiles/regExprTalk.pdf ## http://krijnhoetmer.nl/stuff/regex/cheat-sheet/ ## http://www.dummies.com/how-to/content/how-to-use-regular-expressions-in-r.html ## http://stat.ethz.ch/R-manual/R-patched/library/utils/html/glob2rx.html # ver ## http://www.night-ray.com/regex.pdf ## http://www.addedbytes.com/cheat-sheets/download/regular-expressions-cheat-sheet-v2.pdf datas http://statistics.berkeley.edu/classes/s133/dates.html http://personality-project.org/r/r.plottingdates.html http://en.wikibooks.org/wiki/R_Programming/Times_and_Dates http://www.esawdust.com/blog/rlanguage/files/RFormatDates.html http://www.r-statistics.com/2012/03/do-more-with-dates-and-times-in-r-with-lubridate-1-1-0/ http://blog.revolutionanalytics.com/2009/06/converting-time-zones.html http://www.jstatsoft.org/v40/i03/paper https://science.nature.nps.gov/im/datamgmt/statistics/r/fundamentals/dates.cfm http://danganothererror.wordpress.com/2010/07/24/how-to-calculate-with-dates-and-hours-in-r/ Twitter http://brainchronicle.blogspot.com.br/2012/07/twitter-analysis-of-air-pollution-in.html http://nsaunders.wordpress.com/2012/08/16/twitter-coverage-of-the-ismb-2012-meeting-some-statistics/ http://blog.ouseful.info/2012/07/06/interest-differencing-folk-commonly-followed-by-tweeting-mps-of-different-parties/ http://blagrants.blogspot.com.br/2012/06/im-following-you-in-twitterare-you.html http://isomorphismes.tumblr.com/post/20362455367/twitter googleVis http://rbresearch.wordpress.com/2012/06/30/fun-with-the-googlevis-package-for-r/ http://thebiobucket.blogspot.com.br/2012/09/use-gbif-and-googlevis-to-make-maps.html http://ec2-184-73-106-109.compute-1.amazonaws.com/wordpress/?p=429 http://ec2-184-73-106-109.compute-1.amazonaws.com/wordpress/?p=409 http://lamages.blogspot.com.br/search/label/googleVis word cloud http://onertipaday.blogspot.com.br/2011/07/word-cloud-in-r.html http://www.stubbornmule.net/2012/09/what-is-tony-talking-about/ http://michaelbommarito.com/2012/06/25/wordcloud-arizona-et-al-v-united-states-opinion/ http://bridgewater.wordpress.com/2012/04/18/a-word-cloud-where-the-x-and-y-axes-mean-something/ http://bridgewater.wordpress.com/2012/04/16/word-cloud-alternatives/ http://r-de-jeu.blogspot.com.br/2012/04/50-most-used-r-packages.html http://schmitt-r.blogspot.com.br/2012/03/using-wordcloud-on-normalized-terms.html http://wiekvoet.blogspot.com.br/2012/03/detour-in-taste-wordclouds.html http://blog.ouseful.info/2012/02/15/generating-twitter-wordclouds-in-r-prompted-by-an-open-learning-blogpost/ http://allthingsr.blogspot.com.br/2012/01/updated-sentiment-analysis-and-word.html http://thebiobucket.blogspot.com.br/2011/11/visually-examine-google-scholar-search.html http://binfalse.de/2011/08/wordpress-wordcloud-with-r/ http://blog.echen.me/2011/07/28/tweets-vs-likes-what-gets-shared-on-twitter-vs-facebook/ http://simplystatistics.org/post/16448577464/a-wordcloud-comparison-of-the-2011-and-2012-sotu http://blog.fellstat.com/?p=101 http://thebiobucket.blogspot.com.br/2011/12/what-happens-when-ecologists-do.html http://simplystatistics.org/post/13203811645/an-r-function-to-analyze-your-google-scholar-citations http://jeffreybreen.wordpress.com/2011/07/21/one-liners-twitter/ # ver http://statisfactions.com/2011/listening-for-trends-in-america-s-baby-names/ http://nsaunders.wordpress.com/2010/08/23/abstract-word-clouds-using-r/ http://www.r-statistics.com/2010/03/google-spreadsheets-google-forms-r-easily-collecting-and-importing-data-for-analysis/ texto http://trinkerrstuff.wordpress.com/2012/06/29/igraph-and-structured-text-exploration/ livros novos http://www.portfolioprobe.com/2012/06/22/two-new-important-books-on-r/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+PortfolioProbeRLanguage+%28Portfolio+Probe+%C2%BB+R+language%29 http://www.crcpress.com/product/isbn/9781439856826 tutoriais http://pairach.com/2012/06/17/r_tutorials_non-uni/ http://kbroman.wordpress.com/2011/08/26/quick-labels-within-figures-done-right/ http://ryouready.wordpress.com/2010/03/23/playing-with-the-playwith-package/ programação (controle de estruturas, loops) usar como exemplo a simulação de uma Poisson, ou simular uma fila, um random walk http://manuals.bioinformatics.ucr.edu/home/programming-in-r http://zoonek2.free.fr/UNIX/48_R/02.html http://stats.lse.ac.uk/penzer/ST419materials/CSchpt3.pdf http://www.statmethods.net/management/controlstructures.html http://www.student.tue.nl/V/j.g.v.d.pol/Teaching/R/control.asp http://en.wikibooks.org/wiki/R_Programming/Control_Structures http://genomics.jhu.edu/modules/Rmodules/genRmod7/rControl.pdf http://genomics.jhu.edu/modules/Rmodules/genRmod7/rControlExerciseSolutions.pdf http://rwiki.sciviews.org/doku.php?id=guides:stats-with-r:02programming_in_r:01controlstructures http://yihui.name/en/2010/10/on-the-gory-loops-in-r/ http://biostat.mc.vanderbilt.edu/wiki/pub/Main/SvetlanaEdenRFiles/handouts.pdf http://jnlnet.wordpress.com/2010/04/22/making-r-run-considerably-faster/ http://novicemetrics.blogspot.com.br/2011/07/avoiding-loops-in-r-example-with.html http://www.psychwire.co.uk/2011/05/loops-conditionals-and-variables-a-basic-simulation-in-r/ GUI http://4dpiecharts.com/2010/10/06/creating-guis-in-r-with-gwidgets/ http://4dpiecharts.com/2012/02/20/gui-building-in-r-gwidgets-vs-deducer/ http://r-video-tutorial.blogspot.com.br/2011/09/variogram-fit-with-rpanel.html http://www.sciviews.org/_rgui/tcltk/index.html http://bioinf.wehi.edu.au/~wettenhall/RTclTkExamples/ http://rgm2.lab.nig.ac.jp/RGM2/func.php?rd_id=gWidgets:gWidgets-handlers http://yihui.name/en/2010/04/demonstrating-the-power-of-f-test-with-gwidgets/ http://cran.r-project.org/web/packages/gWidgets/vignettes/gWidgets.pdf iplots http://www.r-statistics.com/2012/01/interactive-graphics-with-the-iplots-package-from-r-in-action/ #------------------------------------------------------------------------------------------ # mineração de texto de um arquivo PDF # http://thebiobucket.blogspot.com.br/2012/09/reading-and-text-mining-pdf-file-in-r.html # download pdftotxt from # ftp://ftp.foolabs.com/pub/xpdf/xpdfbin-win-3.03.zip # and extract to your program files folder # here is a pdf for mining url <- "http://www.noisyroom.net/blog/RomneySpeech072912.pdf" dest <- tempfile(fileext = ".pdf") download.file(url, dest, mode = "wb") # set path to pdftotxt.exe and convert pdf to text exe <- "C:\\Program Files\\xpdfbin-win-3.03\\bin32\\pdftotext.exe" system(paste("\"", exe, "\" \"", dest, "\"", sep = ""), wait = F) ## (intalação) sudo apt-get install pstotext ## (uso) pstotext -output destino.txt fonte.pdf system(paste("pstotext -output destino.txt", dest)) ## outra opção # sudo apt-get install poppler-utils # pdftotext -enc UTF-8 -eol unix abacaxi.pdf out.txt # pdftotext -enc Latin1 -eol unix abacaxi.pdf out.txt # get txt-file name and open it filetxt <- sub(".pdf", ".txt", dest) shell.exec(filetxt); shell.exec(filetxt) # strangely the first try always throws an error.. # do something with it, i.e. a simple word cloud library(tm) library(wordcloud) ## install.packages("Rstem", "/usr/lib/R/site-library", dep=TRUE, repos="http://www.omegahat.org/R") library(Rstem) txt <- readLines(filetxt) # don't mind warning.. txt <- readLines("destino.txt") # don't mind warning.. txt <- iconv(txt, to="ASCII//TRANSLIT") txt <- tolower(txt) txt <- removeWords(txt, c("\\f", stopwords())) corpus <- Corpus(VectorSource(txt)) corpus <- tm_map(corpus, removePunctuation) tdm <- TermDocumentMatrix(corpus) m <- as.matrix(tdm) d <- data.frame(freq = sort(rowSums(m), decreasing = TRUE)) # Stem words d$stem <- wordStem(row.names(d), language = "english") d$stem <- wordStem(row.names(d), language = "portuguese") # and put words to column, otherwise they would be lost when aggregating d$word <- row.names(d) # remove web address (very long string): d <- d[nchar(row.names(d)) < 20, ] # aggregate freqeuncy by word stem and # keep first words.. agg_freq <- aggregate(freq ~ stem, data = d, sum) agg_word <- aggregate(word ~ stem, data = d, function(x) x[1]) d <- cbind(freq = agg_freq[, 2], agg_word) # sort by frequency d <- d[order(d$freq, decreasing = T), ] # print wordcloud: wordcloud(d$word, d$freq) # remove files file.remove(dir(tempdir(), full.name=TRUE)) # remove files