% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustering_similarity.R
\name{cluster_docs}
\alias{cluster_docs}
\title{Cluster documents using K-means}
\usage{
cluster_docs(
  text_data,
  text_column = "abstract",
  n_clusters = 5,
  min_term_freq = 2,
  max_doc_freq = 0.9,
  random_seed = 42
)
}
\arguments{
\item{text_data}{A data frame containing text data.}

\item{text_column}{Name of the column containing text to analyze.}

\item{n_clusters}{Number of clusters to create.}

\item{min_term_freq}{Minimum frequency for a term to be included.}

\item{max_doc_freq}{Maximum document frequency (as a proportion) for a term to be included.}

\item{random_seed}{Seed for random number generation (for reproducibility).}
}
\value{
A data frame with the original data and cluster assignments.
}
\description{
This function clusters documents using K-means based on their TF-IDF vectors.
}
