% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collocate_comments_fuzzy.R
\name{collocate_comments_fuzzy}
\alias{collocate_comments_fuzzy}
\title{Collocate Comments Fuzzy}
\usage{
collocate_comments_fuzzy(
  transcript_token,
  note_token,
  collocate_length = 5,
  n_bands = 50,
  threshold = 0.7,
  n_gram_width = 4
)
}
\arguments{
\item{transcript_token}{transcript token to act as baseline for notes, resulting
from \code{\link[=token_transcript]{token_transcript()}}}

\item{note_token}{tokenized document of notes, resulting from \code{\link[=token_comments]{token_comments()}}}

\item{collocate_length}{the length of the collocation. Default is 5}

\item{n_bands}{number of bands used in MinHash algorithm passed to \code{zoomerjoin::jaccard_right_join()}. Default is 50}

\item{threshold}{Jaccard distance threshold to be considered a match passed to \code{zoomerjoin::jaccard_right_join()}. Default is 0.7}

\item{n_gram_width}{width of n-grams used in Jaccard distance calculation passed to \code{zoomerjoin::jaccard_right_join()}. Default is 4}
}
\value{
data frame of the transcript and corresponding note frequency
}
\description{
This function provides the frequency of collocations in comments that
correspond to the provided transcript, using fuzzy matching.
}
\details{
Collocations are sequences of words present in the source document.
For example, the phrase "the blue bird flies" contains one collocation of
length 4 ("the blue bird flies"), two collocations of length 3 ("the blue bird"
and "blue bird flies"), and three collocations of length 2 ("the blue",
"blue bird", and "bird flies").
This function counts the number of corresponding phrases in the 'notes', or the
derivative documents.
Due to fuzzy matching, indirect matches are included with a weight of
(n*d)/m, where n is the frequency of the fuzzy collocation,
d is the Jaccard similarity between the transcript and note collocation, and m
is the number of closest matches for the note collocation.
}
\examples{
# Rename relevant column to page_notes in the derivative document
comment_example_rename <- dplyr::rename(comment_example[1:10,], page_notes=Notes)
# Tokenize the derivative document
toks_comment <- token_comments(comment_example_rename)
# Rename relevant column in the source document to text
transcript_example_rename <- dplyr::rename(transcript_example, text=Text)
# Tokenize source document
toks_transcript <- token_transcript(transcript_example_rename)
# Compute collocation frequencies using fuzzy (or indirect) matching
fuzzy_object <- collocate_comments_fuzzy(toks_transcript, toks_comment)
}
