% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_provenance.R
\name{track_data}
\alias{track_data}
\title{Track Data Provenance}
\usage{
track_data(
  data_path,
  source = c("downloaded", "generated", "manual", "reference", "other"),
  source_url = NULL,
  description = NULL,
  metadata = NULL,
  fast_hash = TRUE,
  size_threshold_gb = 1,
  registry_file
)
}
\arguments{
\item{data_path}{Character. Path to data file or directory.}

\item{source}{Character. Source of the data (e.g., "downloaded", "generated", "manual", "reference").}

\item{source_url}{Character. URL if data was downloaded. Optional.}

\item{description}{Character. Description of the data. Optional.}

\item{metadata}{List. Additional metadata. Optional.}

\item{fast_hash}{Logical. Use faster xxHash for large files (>1GB). Default TRUE.}

\item{size_threshold_gb}{Numeric. Size threshold (GB) for using fast hash. Default 1.}

\item{registry_file}{Character. Path to provenance registry (required).}
}
\value{
A list containing data provenance information
}
\description{
Records comprehensive provenance information for data files including checksums,
sources, timestamps, and metadata. Supports fast hashing for large files.
}
\examples{
\dontrun{
# Track a downloaded dataset
track_data("data/mydata.csv",
  source = "downloaded",
  source_url = "https://example.com/data.csv",
  description = "Customer data from API",
  registry_file = tempfile(fileext = ".json")
)

# Track generated data
track_data("results/simulation.rds",
  source = "generated",
  description = "Monte Carlo simulation results",
  registry_file = tempfile(fileext = ".json")
)

# Track large file with fast hashing
track_data("data/large_file.bam",
  source = "generated",
  fast_hash = TRUE,
  registry_file = tempfile(fileext = ".json")
)
}
}
