% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Main.codes.R
\name{Data.cluster}
\alias{Data.cluster}
\title{Cluster OTU Time-Series Data Based on Regression Model prediction and Generate Dendrogram Plots}
\usage{
Data.cluster(
  predicted_data,
  clust_method = "complete",
  font_size = 0.2,
  dend_title_size = 15
)
}
\arguments{
\item{predicted_data}{The output data frame from the \code{\link[MicrobTiSDA]{Pred.data}}.}

\item{clust_method}{A string, the agglomeration method to be used. This argument should be one of "ward.D", "ward.D2", "single",
"complete", "average", "mcquitty", "median", "centroid". Detail see \code{\link[stats]{hclust}}.}

\item{font_size}{A numeric value specifying the font size for text labels in the dendrogram plots (default: \code{0.2}).}

\item{dend_title_size}{A numeric value specifying the font size of the dendrogram plot title (default: \code{15}).}
}
\value{
An object of \code{MicrobTiSDA.cluster} with three elements:
\describe{
\item{predicted_data}{The original input list of predicted data.}
\item{cluster_results}{A list of hierarchical clustering objects (one per group).}
\item{cluster_figures}{A list of ggplot2 objects containing the dendrogram plots for each group.}
}
}
\description{
This function performs hierarchical clustering on predicted OTU time-series data for different groups
and generates corresponding dendrogram plots. For each group in the input list, the function computes a
correlation-based distance matrix, performs hierarchical clustering using the specified clustering method
(e.g. \code{average}), and then converts the result into a dendrogram.
}
\details{
For each group in the input \code{predicted_data}, the function first extracts the predicted OTU data (excluding the
last column, which is assumed to contain time information) and computes a correlation matrix, which is converted
into a distance matrix via
\deqn{d_{\text{corr}}(x,y) = 1-\frac{{\sum_{i=1}^{n}(x_i-\bar{y})}}{{\sqrt{{\sum_{i=1}^{n}(x_i-\bar{x})^2}} \sqrt{{\sum_{i=1}^{n}(y_i-\bar{y})^2}}}}}
where \eqn{x} and \eqn{y} represent the two OTU time series being compared, \eqn{n} denotes the total number of time points, and
\eqn{\bar{x}} and \eqn{\bar{y}} denote the means of the respective time series. Hierarchical clustering is
performed on the above distance matrix using the method specified in \code{clust_method}.
}
\examples{
\donttest{
# Example metadata with grouping variables
metadata <- data.frame(
  TimePoint = c(1, 2, 3, 4),
  Sample = c('S1', 'S2', 'S3', 'S4'),
  GroupA = c('A', 'A', 'B', 'B'),
  GroupB = c('X', 'Y', 'X', 'Y')
)

# Example pre-processed data (e.g., transformed abundance data)
Pre_processed_Data <- data.frame(
  Feature1 = rnorm(4),
  Feature2 = rnorm(4)
)

# Create design matrix using grouping variables
design_data <- Design(metadata, Group_var = c('GroupA', 'GroupB'), Pre_processed_Data,
                      Sample_Time = 'TimePoint', Sample_ID = 'Sample')

reg <- Reg.SPLR(design_data,
                  Pre_processed_Data,
                  z_score = 2,
                  unique_values = 5,
                  Knots = NULL,
                  max_Knots = 5)
predictions <- Pred.data(reg,
                        metadata,
                        Group = "GroupA",
                        time_step = 1,
                        Sample_Time = "TimePoint")
result <- Data.cluster(predicted_data = predictions,
                       clust_method = "average",
                       font_size = 0.2,
                       dend_title_size = 15)

}
}
\author{
Shijia Li
}
