Below is some R code that could be employed to summarize the unique words from the text.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| words <- read.table(file.choose(), header = FALSE,fill = TRUE)
words <- apply(words,c(1,2),function(x) gsub("[[:punct:]]", "", x))
words <- words[(words !='') ]
x_numbers <- unlist(regmatches(words, gregexpr("[[:digit:]]+", words)))
words <- gsub('[[:digit:]]+', '', words)
words <- words[(words !='') ]
x_numbers <- unlist(regmatches(words, gregexpr("[[:digit:]]+", words)))
length(unique(words))
|