/**
 * @file pastix_ccores.h
 *
 * PaStiX kernel header.
 *
 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 * @version 6.4.0
 * @author Mathieu Faverge
 * @author Pierre Ramet
 * @author Xavier Lacoste
 * @author Esragul Korkmaz
 * @author Gregoire Pichon
 * @author Tony Delarue
 * @author Alycia Lisito
 * @author Nolan Bredel
 * @date 2024-07-05
 * @generated from /build/pastix/src/pastix-6.4.0/kernels/pastix_zcores.h, normal z -> c, Thu Oct 23 06:51:46 2025
 *
 */
#ifndef _pastix_ccores_h_
#define _pastix_ccores_h_

#ifndef DOXYGEN_SHOULD_SKIP_THIS
#define pastix_cblk_lock( cblk_ )    pastix_atomic_lock( &((cblk_)->lock) )
#define pastix_cblk_unlock( cblk_ )  pastix_atomic_unlock( &((cblk_)->lock) )
#endif /* DOXYGEN_SHOULD_SKIP_THIS */

/**
 * @addtogroup kernel_blas_lapack
 * @{
 *    This module contains all the BLAS and LAPACK-like kernels that are working
 *    on lapack layout matrices.
 *
 *    @name PastixComplex32 BLAS kernels
 *    @{
 */
void core_cplrnt( int                    m,
                  int                    n,
                  pastix_complex32_t    *A,
                  int                    lda,
                  int                    gM,
                  int                    m0,
                  int                    n0,
                  unsigned long long int seed );
void core_cgetmo( int                       m,
                  int                       n,
                  const pastix_complex32_t *A,
                  int                       lda,
                  pastix_complex32_t       *B,
                  int                       ldb );
int core_cgeadd( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 pastix_complex32_t        alpha,
                 const pastix_complex32_t *A,
                 pastix_int_t              LDA,
                 pastix_complex32_t        beta,
                 pastix_complex32_t       *B,
                 pastix_int_t              LDB );
int core_cgemdm( pastix_trans_t            transA,
                 pastix_trans_t            transB,
                 int                       M,
                 int                       N,
                 int                       K,
                 pastix_complex32_t        alpha,
                 const pastix_complex32_t *A,
                 int                       LDA,
                 const pastix_complex32_t *B,
                 int                       LDB,
                 pastix_complex32_t        beta,
                 pastix_complex32_t       *C,
                 int                       LDC,
                 const pastix_complex32_t *D,
                 int                       incD,
                 pastix_complex32_t       *WORK,
                 int                       LWORK );
int core_cpqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 full_update,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 pastix_complex32_t *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 pastix_complex32_t *tau,
                 pastix_complex32_t *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_crqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 refine,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 pastix_complex32_t *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 pastix_complex32_t *tau,
                 pastix_complex32_t *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_crqrrt( float              tol,
                 pastix_int_t        maxrank,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 pastix_complex32_t *A,
                 pastix_int_t        lda,
                 pastix_complex32_t *tau,
                 pastix_complex32_t *B,
                 pastix_int_t        ldb,
                 pastix_complex32_t *tau_b,
                 pastix_complex32_t *work,
                 pastix_int_t        lwork,
                 float              normA );
int core_ctqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 unused,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 pastix_complex32_t *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 pastix_complex32_t *tau,
                 pastix_complex32_t *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_ctradd( pastix_uplo_t             uplo,
                 pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 pastix_complex32_t        alpha,
                 const pastix_complex32_t *A,
                 pastix_int_t              LDA,
                 pastix_complex32_t        beta,
                 pastix_complex32_t       *B,
                 pastix_int_t              LDB);
int core_cscalo( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 const pastix_complex32_t *A,
                 pastix_int_t              lda,
                 const pastix_complex32_t *D,
                 pastix_int_t              ldd,
                 pastix_complex32_t       *B,
                 pastix_int_t              ldb );

/**
 *    @}
 *    @name PastixComplex32 Othogonalization kernels for low-rank updates
 *    @{
 */
pastix_fixdbl_t core_clrorthu_fullqr( pastix_int_t        M,
                                      pastix_int_t        N,
                                      pastix_int_t        rank,
                                      pastix_complex32_t *U,
                                      pastix_int_t        ldu,
                                      pastix_complex32_t *V,
                                      pastix_int_t        ldv );
pastix_fixdbl_t core_clrorthu_partialqr( pastix_int_t        M,
                                         pastix_int_t        N,
                                         pastix_int_t        r1,
                                         pastix_int_t       *r2ptr,
                                         pastix_int_t        offx,
                                         pastix_int_t        offy,
                                         pastix_complex32_t *U,
                                         pastix_int_t        ldu,
                                         pastix_complex32_t *V,
                                         pastix_int_t        ldv );
pastix_fixdbl_t core_clrorthu_cgs( pastix_int_t        M1,
                                   pastix_int_t        N1,
                                   pastix_int_t        M2,
                                   pastix_int_t        N2,
                                   pastix_int_t        r1,
                                   pastix_int_t       *r2ptr,
                                   pastix_int_t        offx,
                                   pastix_int_t        offy,
                                   pastix_complex32_t *U,
                                   pastix_int_t        ldu,
                                   pastix_complex32_t *V,
                                   pastix_int_t        ldv );

/**
 *    @}
 *    @name PastixComplex32 LAPACK kernels
 *    @{
 */
void core_cpotrfsp( pastix_int_t        n,
                    pastix_complex32_t *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
void core_cpxtrfsp( pastix_int_t        n,
                    pastix_complex32_t *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
void core_cgetrfsp( pastix_int_t        n,
                    pastix_complex32_t *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_chetrfsp( pastix_int_t        n,
                    pastix_complex32_t *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
#endif
void core_csytrfsp( pastix_int_t        n,
                    pastix_complex32_t *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_fact
 * @{
 *    This module contains all the kernel working at the solver matrix structure
 *    level for the numerical factorization step.
 *
 *    @name PastixComplex32 cblk-BLAS CPU kernels
 *    @{
 */

int cpucblk_cgeaddsp1d( const SolverCblk         *cblk1,
                        SolverCblk               *cblk2,
                        const pastix_complex32_t *L1,
                        pastix_complex32_t       *L2,
                        const pastix_complex32_t *U1,
                        pastix_complex32_t       *U2 );

pastix_fixdbl_t cpucblk_cgemmsp( pastix_coefside_t   sideA,
                                 pastix_trans_t      trans,
                                 const SolverCblk   *cblk,
                                 const SolverBlok   *blok,
                                 SolverCblk         *fcblk,
                                 const void         *A,
                                 const void         *B,
                                 void               *C,
                                 pastix_complex32_t *work,
                                 pastix_int_t        lwork,
                                 const pastix_lr_t  *lowrank );
void cpucblk_ctrsmsp( pastix_side_t      side,
                      pastix_uplo_t      uplo,
                      pastix_trans_t     trans,
                      pastix_diag_t      diag,
                      const SolverCblk  *cblk,
                      const void        *A,
                      void              *C,
                      const pastix_lr_t *lowrank );
void cpucblk_cscalo ( pastix_trans_t     trans,
                      const SolverCblk  *cblk,
                      void              *dataL,
                      void              *dataLD );

pastix_fixdbl_t cpublok_cgemmsp( pastix_trans_t     trans,
                                 const SolverCblk  *cblk,
                                 SolverCblk        *fcblk,
                                 pastix_int_t       blok_mk,
                                 pastix_int_t       blok_nk,
                                 pastix_int_t       blok_mn,
                                 const void        *A,
                                 const void        *B,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
pastix_fixdbl_t cpublok_ctrsmsp( pastix_side_t      side,
                                 pastix_uplo_t      uplo,
                                 pastix_trans_t     trans,
                                 pastix_diag_t      diag,
                                 const SolverCblk  *cblk,
                                 pastix_int_t       blok_m,
                                 const void        *A,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
void cpublok_cscalo ( pastix_trans_t    trans,
                      const SolverCblk *cblk,
                      pastix_int_t      blok_m,
                      const void       *A,
                      const void       *dataD,
                      void             *dataB );

/**
 *    @}
 *    @name PastixComplex32 cblk LU kernels
 *    @{
 */
int cpucblk_cgetrfsp1d_getrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_cgetrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_cgetrfsp1d      ( SolverMatrix       *solvmtx,
                              SolverCblk         *cblk,
                              pastix_complex32_t *work,
                              pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixComplex32 cblk Cholesky kernels
 *    @{
 */
int cpucblk_cpotrfsp1d_potrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_cpotrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_cpotrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        pastix_complex32_t *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */

#if defined(PRECISION_z) || defined(PRECISION_c)
 /**
 *    @name PastixComplex32 cblk LDL^h kernels
 *    @{
 */
int cpucblk_chetrfsp1d_hetrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_chetrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLh );
int cpucblk_chetrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        pastix_complex32_t *work1,
                        pastix_complex32_t *work2,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixComplex32 cblk LL^t kernels
 *    @{
 */
int cpucblk_cpxtrfsp1d_pxtrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_cpxtrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_cpxtrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        pastix_complex32_t *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */
#endif

 /**
 *    @name PastixComplex32 cblk LDL^t kernels
 *    @{
 */
int cpucblk_csytrfsp1d_sytrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_csytrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLt );
int cpucblk_csytrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        pastix_complex32_t *Dlt,
                        pastix_complex32_t *work,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixComplex32 initialization and additionnal routines
 *    @{
 */
void cpucblk_calloc_lrws( const SolverCblk   *cblk,
                          pastix_lrblock_t   *lrblok,
                          pastix_complex32_t *ws );
void cpucblk_calloc_lr( pastix_coefside_t  side,
                        SolverCblk        *cblk,
                        int                rkmax );
void cpucblk_calloc_fr( pastix_coefside_t  side,
                        SolverCblk        *cblk );
void cpucblk_calloc( pastix_coefside_t  side,
                     SolverCblk        *cblk );
void cpucblk_cfree( pastix_coefside_t  side,
                    SolverCblk        *cblk );
void cpucblk_cfillin( pastix_coefside_t    side,
                      const SolverMatrix  *solvmtx,
                      const pastix_bcsc_t *bcsc,
                      pastix_int_t         itercblk );
void cpucblk_cinit( pastix_coefside_t    side,
                    const SolverMatrix  *solvmtx,
                    const pastix_bcsc_t *bcsc,
                    pastix_int_t         itercblk,
                    const char          *directory );
void cpucblk_cgetschur( const SolverCblk   *cblk,
                        int                 upper_part,
                        pastix_complex32_t *S,
                        pastix_int_t        lds );
void cpucblk_cdump( pastix_coefside_t  side,
                    const SolverCblk  *cblk,
                    FILE              *stream );
int cpucblk_cdiff( pastix_coefside_t  side,
                   const SolverCblk  *cblkA,
                   SolverCblk        *cblkB );
pastix_fixdbl_t cpucblk_cadd( pastix_complex32_t  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              const void         *A,
                              void               *B,
                              pastix_complex32_t *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );
pastix_fixdbl_t cpublok_cadd( pastix_complex32_t  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              pastix_int_t        blokA_m,
                              pastix_int_t        blokB_m,
                              const void         *A,
                              void               *B,
                              pastix_complex32_t *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );

/**
 *    @}
 *    @name PastixComplex32 MPI routines
 *    @{
 */
int cpucblk_cincoming_deps( int                mt_flag,
                            pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            SolverCblk        *cblk );
void cpucblk_crelease_deps( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            const SolverCblk  *cblk,
                            SolverCblk        *fcbk );
void cpucblk_crequest_cleanup( pastix_coefside_t  side,
                               pastix_int_t       sched,
                               SolverMatrix      *solvmtx );
void cpucblk_cupdate_reqtab( SolverMatrix *solvmtx );
#if defined( PASTIX_WITH_MPI )
void cpucblk_cmpi_progress( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            int                threadid );
void cpucblk_cisend_rhs_bwd( SolverMatrix *solvmtx,
                             pastix_rhs_t  rhsb,
                             SolverCblk   *cblk );
#endif
void cpucblk_cmpi_rhs_fwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_crelease_rhs_fwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_cincoming_rhs_fwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_crequest_rhs_fwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );

void cpucblk_cmpi_rhs_bwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_crelease_rhs_bwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_cincoming_rhs_bwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_crequest_rhs_bwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );
void cpucblk_csend_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                pastix_rhs_t        b );
void cpucblk_crecv_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                pastix_complex32_t *work,
                                pastix_rhs_t        b );
void cpucblk_csend_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );
void cpucblk_crecv_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );

/**
 *    @}
 *    @name PastixComplex32 compression/uncompression routines
 *    @{
 */
pastix_fixdbl_t cpublok_ccompress( const pastix_lr_t *lowrank,
                                   pastix_int_t        M,
                                   pastix_int_t        N,
                                   pastix_lrblock_t   *blok );
pastix_int_t cpucblk_ccompress( const SolverMatrix *solvmtx,
                                pastix_coefside_t   side,
                                int                 max_ilulvl,
                                SolverCblk         *cblk );
void cpucblk_cuncompress( pastix_coefside_t  side,
                          SolverCblk        *cblk );
void cpucblk_cmemory( pastix_coefside_t   side,
                      const SolverMatrix *solvmtx,
                      SolverCblk         *cblk,
                      pastix_int_t       *orig,
                      pastix_int_t       *gain );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_solve
 * @{
 *    This module contains all the kernel working on the solver matrix structure
 *    for the solve step.
 *
 */

void solve_blok_ctrsm( pastix_side_t       side,
                       pastix_uplo_t       uplo,
                       pastix_trans_t      trans,
                       pastix_diag_t       diag,
                       const SolverCblk   *cblk,
                       int                 nrhs,
                       const void         *dataA,
                       pastix_complex32_t *b,
                       int                 ldb );
void solve_blok_cgemm( pastix_side_t             side,
                       pastix_trans_t            trans,
                       pastix_int_t              nrhs,
                       const SolverCblk         *cblk,
                       const SolverBlok         *blok,
                       SolverCblk               *fcbk,
                       const void               *dataA,
                       const pastix_complex32_t *B,
                       pastix_int_t              ldb,
                       pastix_complex32_t       *C,
                       pastix_int_t              ldc );

void solve_cblk_ctrsmsp_forward( const args_solve_t *enums,
                                 SolverMatrix       *datacode,
                                 const SolverCblk   *cblk,
                                 pastix_rhs_t        b );
void solve_cblk_ctrsmsp_backward( const args_solve_t *enums,
                                  SolverMatrix       *datacode,
                                  SolverCblk         *cblk,
                                  pastix_rhs_t        b );

void solve_cblk_cdiag( const SolverCblk   *cblk,
                       const void         *dataA,
                       int                 nrhs,
                       pastix_complex32_t *b,
                       int                 ldb,
                       pastix_complex32_t *work );
/**
 * @}
 *
 * @addtogroup kernel_fact_null
 * @{
 *    This module contains the three terms update functions for the LDL^t and
 *    LDL^h factorizations.
 *
 */
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_chetrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const pastix_complex32_t *L,
                           pastix_complex32_t       *C,
                           pastix_complex32_t       *work );
#endif
void core_csytrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const pastix_complex32_t *L,
                           pastix_complex32_t       *C,
                           pastix_complex32_t       *work );

int
cpucblk_cpotrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_cpotrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               pastix_complex32_t *work,
                               pastix_int_t        lwork );
int
cpucblk_csytrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_csytrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               pastix_complex32_t *work );
int
cpucblk_cgetrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_cgetrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               pastix_complex32_t *work,
                               pastix_int_t        lwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
int
cpucblk_cpxtrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_cpxtrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               pastix_complex32_t *work,
                               pastix_int_t        lwork );
int
cpucblk_chetrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_chetrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               pastix_complex32_t *work );
#endif

/**
 * @}
 */

#endif /* _pastix_ccores_h_ */
