lnalborczyk · January 2, 2023 13:02
diff --git a/consecutive_duplicated_words.R b/consecutive_duplicated_words.R
 ###########################################################
 # Find duplicated consecutive words in a character string #
 # or in a RMarkdown document                              #
 # ------------------------------------------------------- #
 # Written by Ladislas Nalborczyk                          #
 # Contact: ladislas.nalborczyk@gmail.com                  #
 # Last updated on January 2, 2023                         #
 ###########################################################

 # loading some packages
 library(tidyverse)

 # example string containing duplicated consecutive and non-consecutive words
 some_string <- "we recruited 46 we participants, the the participants were instructed to"

 # regex solution suggested in
 # https://stackoverflow.com/questions/2823016/regular-expression-for-duplicate-words
 regex_string <- "\\b(\\w+)(\\s+\\1\\b)+"

 # base R solution suggested in
 # https://twitter.com/olivier_pucher/status/1609885216688197632?s=20&t=OvTSyvyh4aNTO8RGRZbAMg
 v <- stringr::str_split(string = some_string, pattern = " ") |> unlist()
 v == lag(v)

 # detecting duplicated consecutive words
 stringr::str_detect(string = some_string, pattern = regex_string)

 # replacing duplicated consecutive words
 stringr::str_extract(string = some_string, pattern = regex_string)

 # extracting text from RMarkdown document
 filetext <- readLines(con = "some_rmd_file.Rmd")

 # testing for duplicated consecutive words
 stringr::str_detect(string = filetext, pattern = regex_string)

 # is there any?
 stringr::str_detect(string = filetext, pattern = regex_string) |> any()

 # which words?
 stringr::str_extract(string = filetext, pattern = regex_string)

 # where?
 filetext[stringr::str_detect(string = filetext, pattern = regex_string)]
	###########################################################
	# Find duplicated consecutive words in a character string #
	# or in a RMarkdown document #
	# ------------------------------------------------------- #
	# Written by Ladislas Nalborczyk #
	# Contact: ladislas.nalborczyk@gmail.com #
	# Last updated on January 2, 2023 #
	###########################################################

	# loading some packages
	library(tidyverse)

	# example string containing duplicated consecutive and non-consecutive words
	some_string <- "we recruited 46 we participants, the the participants were instructed to"

	# regex solution suggested in
	# https://stackoverflow.com/questions/2823016/regular-expression-for-duplicate-words
	regex_string <- "\\b(\\w+)(\\s+\\1\\b)+"

	# base R solution suggested in
	# https://twitter.com/olivier_pucher/status/1609885216688197632?s=20&t=OvTSyvyh4aNTO8RGRZbAMg
	v <- stringr::str_split(string = some_string, pattern = " ") \|> unlist()
	v == lag(v)

	# detecting duplicated consecutive words
	stringr::str_detect(string = some_string, pattern = regex_string)

	# replacing duplicated consecutive words
	stringr::str_extract(string = some_string, pattern = regex_string)

	# extracting text from RMarkdown document
	filetext <- readLines(con = "some_rmd_file.Rmd")

	# testing for duplicated consecutive words
	stringr::str_detect(string = filetext, pattern = regex_string)

	# is there any?
	stringr::str_detect(string = filetext, pattern = regex_string) \|> any()

	# which words?
	stringr::str_extract(string = filetext, pattern = regex_string)

	# where?
	filetext[stringr::str_detect(string = filetext, pattern = regex_string)]
No results found