!{\src2tex{textfont=tt}}
!!****f* abinit/lobpcgccwf
!! NAME
!! lobpcgccwf
!!
!! FUNCTION
!! this routine updates the whole wave functions at a given k-point,
!! using the lobpcg method
!! for a given spin-polarization, from a fixed hamiltonian
!! but might also simply compute eigenvectors and eigenvalues at this k point.
!! it will also update the matrix elements of the hamiltonian.
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (GZ,AR,MT)
!! this file is distributed under the terms of the
!! gnu general public license, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  dimffnl=second dimension of ffnl (1+number of derivatives)
!!  dtfil <type(datafiles_type)>=variables related to files
!!  dtset <type(dataset_type)>=all input variales for this dataset
!!  ffnl(npw_k,dimffnl,lmnmax,ntypat)=nonlocal form factors on basis sphere.
!!  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
!!  icg=shift to be applied on the location of data in the array cg
!!  igsc=shift to be applied on the location of data in the array gsc
!!  kg_k(3,npw_k)=reduced planewave coordinates.
!!  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
!!  kpg_k(npw,nkpg)= (k+G) components (only if useylm=1)
!!  lmnmax=if useylm=1, max number of (l,m,n) comp. over all type of psps
!!        =if useylm=0, max number of (l,n)   comp. over all type of psps
!!  matblk=dimension of the array ph3d
!!  mcg=second dimension of the cg array
!!  mgfft=maximum size of 1d ffts
!!  mgsc=second dimension of the gsc array
!!  mpi_enreg=informations about mpi parallelization
!!  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
!!  mpssoang= 1+maximum (spin*angular momentum) for nonlocal pseudopotentials
!!  natom=number of atoms in cell.
!!  nband_k=number of bands at this k point for that spin polarization
!!  nbdblock : number of blocks
!!  nkpg=second dimension of kpg_k (0 if useylm=0)
!!  npw_k=number of plane waves at this k point
!!  nspinor=number of spinorial components of the wavefunctions
!!  ntypat=number of types of atoms in unit cell.
!!  nvloc=final dimension of vlocal (usually 1, but 4 for non-collinear)
!!  n4,n5,n6 used for dimensionning of vlocal
!!  ph3d(2,npw,matblk)=3-dim structure factors, for each atom and plane wave.
!!  prtvol=control print volume and debugging output
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  use_subovl= 1 if "subovl" array is computed (see below)
!!  vlocal(n4,n5,n6,nvloc)= local potential in real space, on the augmented fft grid
!!
!! OUTPUT
!!  resid_k(nband_k)=residuals for each states
!!  subham(nband_k*(nband_k+1))=the matrix elements of h
!!  If gs_hamk%usepaw==0:
!!    totvnl(nband_k*(1-gs_hamk%usepaw),nband_k*(1-gs_hamk%usepaw))=the matrix elements of vnl
!!    gsc(2,mgsc)=<g|s|c> matrix elements (s=overlap)
!!  If use_subovl==0:
!!    subovl(nband_k*(nband_k+1)*use_subovl)=the matrix elements of s
!!
!! SIDE EFFECTS
!!  cg(2,mcg)=updated wavefunctions
!!
!! PARENTS
!!      vtowfk
!!
!! CHILDREN
!!      timab,wrtout,xcomm_init,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine lobpcgccwf(cg,dimffnl,dtfil,dtset,ffnl,gs_hamk,gsc,icg,igsc,&
&           kg_k,kinpw,lmnmax,matblk,mcg,mgfft,mgsc,mpi_enreg,mpsang,mpssoang,natom,&
&           nband_k,nbdblock,npw_k,nspinor,ntypat,nvloc,n4,n5,n6,ph3d,prtvol,&
&           psps,resid_k,subham,subovl,totvnl,use_subovl,vlocal)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_13nonlocal
 use interfaces_14wfs
 use interfaces_18seqpar, except_this_one => lobpcgccwf
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

#if defined MPI_FFT
          include 'mpif.h'
#endif
!Arguments ------------------------------------
 integer :: spacecomm=0,spacecomm_old=0
 type(gs_hamiltonian_type) :: gs_hamk
 integer :: dimffnl,icg,igsc,lmnmax,matblk,mcg,mgsc,mgfft,mpsang,mpssoang,n4,n5,n6
 integer :: natom,nband_k,nbdblock,npw_k,nspinor,ntypat,nvloc,prtvol,use_subovl
 type(datafiles_type) :: dtfil
 type(dataset_type) :: dtset
 type(pseudopotential_type) :: psps
 type(mpi_type) :: mpi_enreg
 integer :: kg_k(3,npw_k)
 real(dp) :: cg(2,mcg),ffnl(npw_k,dimffnl,lmnmax,ntypat),gsc(2,mgsc)
 real(dp) :: kinpw(npw_k),ph3d(2,npw_k,matblk),resid_k(nband_k)
 real(dp) :: vlocal(n4,n5,n6,nvloc)
 real(dp) :: subham(nband_k*(nband_k+1))
 real(dp) :: subovl(nband_k*(nband_k+1)*use_subovl)
 complex(dp) :: totvnl(nband_k*(1-gs_hamk%usepaw),nband_k*(1-gs_hamk%usepaw))

!Local variables-------------------------------
 integer, parameter :: tim_getghc=5
 integer :: activepsize,activersize,bblocksize,bigorder,blocksize,cgindex,choice,cpopt
 integer :: cond_try,gscindex,iblocksize
 integer :: i1,i2,i3,i4,iband,iblock,idir,ier,ierr,ii,info,ioption
 integer :: ipw,ipw1,istwf_k,isubo,isubh,iterationnumber,ivectsize,iwavef,jwavef,lwork
 integer :: maxblocksize,maxiterations,nkpg,nnlout,old_num_group_fft,old_paral_compil_fft,old_paral_level
 integer :: nkpg_loc
 integer :: paw_opt,restart,signs,sij_opt,tim_nonlop,tocomplete,vectsize
 logical :: gen_eigenpb,block_to_complete
 real(dp) :: cgreipw,cgimipw,cscre,cscim,chcre,chcim,cvcre,cvcim,dum,sq2
 complex(dp) :: cminusone
 integer :: old_ngfft(18)
 real(dp), allocatable :: gwavef(:,:),cwavef(:,:),gvnlc(:,:),lambda_loc(:),kpg_dum(:,:)
 real(dp), allocatable :: swavef(:,:)
 real(dp), allocatable :: residualnorms(:),eigen(:),rwork(:)
 real(dp) :: nonlop_dum(1),enlout(1),tsec(2)
 real(dp),allocatable :: sij_loc(:,:),gsc_loc(:,:),kpg_loc(:,:)
 complex(dp), allocatable :: blockvectorx(:,:),blockvectorax(:,:),blockvectorbx(:,:),&
       & blockvectorr(:,:),blockvectorar(:,:),blockvectorbr(:,:),&
       & blockvectorp(:,:),blockvectorap(:,:),blockvectorbp(:,:),blockvectordumm(:,:),&
       & blockvectory(:,:),blockvectorby(:,:),&
       & gramxax(:,:),gramxar(:,:),gramxap(:,:),gramrar(:,:),gramrap(:,:),&
       & grampap(:,:),&
       & gramxbx(:,:),gramxbr(:,:),gramxbp(:,:),gramrbr(:,:),gramrbp(:,:),&
       & grampbp(:,:),&
       & identity(:,:),coordx(:,:),lambda(:,:),&
       & grama(:,:),gramb(:,:),gramyx(:,:),&
       & transf(:,:,:),w(:),work(:)
 real(dp), allocatable :: dummy2(:,:),dummy1(:)
 complex(dp),allocatable :: csubham(:,:)
 type(cprj_type) :: cprj_dum(1)


#ifdef VMS
!DEC$ ATTRIBUTES ALIAS:'ZGEMM' :: zgemm
!DEC$ ATTRIBUTES ALIAS:'ZHEEV' :: zheev
!DEC$ ATTRIBUTES ALIAS:'ZHEGV' :: zhegv
!DEC$ ATTRIBUTES ALIAS:'ZTRSM' :: ztrsm
!DEC$ ATTRIBUTES ALIAS:'ZTRSM' :: zcopy
#endif

!no_abirules
!correspondence with abinit. here for real wf but in complex mode
!this is the index of a given band
  cgindex(iblocksize)=npw_k*nspinor*(iblocksize-1)+icg+1
  gscindex(iblocksize)=npw_k*nspinor*(iblocksize-1)+igsc+1

  gen_eigenpb=(gs_hamk%usepaw==1)

! *************************************************************************

 call timab(530,1,tsec)
 cminusone=-cone
 sq2=sqrt(two)
 vectsize=npw_k*nspinor

 maxblocksize=mpi_enreg%nproc_fft
 if(mpi_enreg%mode_para=='b') maxblocksize=mpi_enreg%nproc_band

 istwf_k=gs_hamk%istwf_k
 maxiterations=dtset%nline

 ioption=mpi_enreg%fft_option_lob
 if (mpi_enreg%nproc_fft ==1 .and. mpi_enreg%fft_option_lob==2 .and. mpi_enreg%nproc_band==1) ioption=1

!###########################################################################
!################ BIG LOOP OVER BLOCKS  ####################################
!###########################################################################
 do iblock=1,nbdblock

  blocksize=mpi_enreg%nproc_fft
  if(mpi_enreg%mode_para=='b') blocksize=mpi_enreg%nproc_band

! block_to_complete : true if the tabs with dimension blocksize need zeroes at the end
  block_to_complete=.false.;tocomplete=0
  if ((iblock == nbdblock) .and. (nbdblock * blocksize > nband_k)) then
! the last block is smaller than the others
   block_to_complete=.true.
   tocomplete=nbdblock * blocksize - nband_k-1
   blocksize=mpi_enreg%nproc_fft - (nbdblock * blocksize - nband_k)
  end if

  bblocksize=(iblock-1)*blocksize

! allocations
  allocate(blockvectorx(vectsize,blocksize),blockvectorax(vectsize,blocksize))
  allocate(blockvectorbx(vectsize,blocksize))
  allocate(blockvectorr(vectsize,blocksize),blockvectorar(vectsize,blocksize))
  allocate(blockvectorbr(vectsize,blocksize))
  allocate(blockvectorp(vectsize,blocksize),blockvectorap(vectsize,blocksize))
  allocate(blockvectorbp(vectsize,blocksize))
  allocate(blockvectordumm(vectsize,blocksize))

  allocate(gramxax(blocksize,blocksize),&
       & gramxar(blocksize,blocksize),gramxap(blocksize,blocksize),&
       & gramrar(blocksize,blocksize),gramrap(blocksize,blocksize),&
       & grampap(blocksize,blocksize),&
       & gramxbx(blocksize,blocksize),gramxbr(blocksize,blocksize),&
       & gramxbp(blocksize,blocksize),gramrbr(blocksize,blocksize),&
       & gramrbp(blocksize,blocksize),&
       & grampbp(blocksize,blocksize))
  allocate(lambda(blocksize,blocksize))
  allocate(transf(blocksize,blocksize,3))
  allocate(residualnorms(blocksize))

  allocate(blockvectory(vectsize,bblocksize),blockvectorby(vectsize,bblocksize))
  allocate(gramyx(bblocksize,blocksize))

! transfer array of wf coeff in block to blockvectorx (complex to complex)
  do iblocksize=1,blocksize
   iband=iblocksize+bblocksize
   call zcopy(vectsize,cg(:,cgindex(iband):cgindex(iband+1)-1),1,blockvectorx(:,iblocksize),1)
  end do

!!!!!!!!!!!!!!!!!!!!!!!!! Begin if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! transfer array of wf coeff less than iblock to blockvectory (not done)
  if(iblock /=1) then
!  transfer cg to blockvectory, for the previous band index
   do iblocksize=1,bblocksize
    iband=iblocksize
    call zcopy(vectsize,cg(:,cgindex(iband):cgindex(iband+1)-1),1,blockvectory(:,iblocksize),1)
   end do

   if(gen_eigenpb) then
    do iblocksize=1,bblocksize
     iband=iblocksize
     call zcopy(vectsize,gsc(:,gscindex(iband):gscindex(iband+1)-1),1,blockvectorby(:,iblocksize),1)
    end do
   else
    call zcopy(vectsize*bblocksize,blockvectory,1,blockvectorby,1)
   end if

!  orthogonalize x to the constraint y(supposed orthonormal)
!  blockvectorx=blockvectorx-&
!              &matmul(blockvectory,matmul(transpose(blockvectorby),blockvectorx))

   call timab(532,1,tsec)
   call zgemm('c','n',bblocksize,blocksize,vectsize,cone,blockvectorby,&
&            vectsize,blockvectorx,vectsize,czero,gramyx,bblocksize)
   call timab(532,2,tsec)

   old_paral_level= mpi_enreg%paral_level
   mpi_enreg%paral_level=3
   call xcomm_init(mpi_enreg,spaceComm)
   if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
   call timab(533,1,tsec)
   call xsum_mpi(gramyx,spaceComm,ierr)
   call timab(533,2,tsec)
   mpi_enreg%paral_level= old_paral_level

   call timab(532,1,tsec)
   call zgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,&
&             vectsize,gramyx,bblocksize,cone,blockvectorx,vectsize)
   call timab(532,2,tsec)
  end if
!!!!!!!!!!!!!!!!!!!!!!!!! End if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

  allocate(cwavef(2,npw_k*nspinor*maxblocksize),gwavef(2,npw_k*nspinor*maxblocksize),gvnlc(2,npw_k*nspinor*maxblocksize))
  allocate(swavef(2,npw_k*nspinor*maxblocksize))
  if (block_to_complete) then
   cwavef(:,:)=zero
  end if
  call zcopy(npw_k*nspinor*blocksize,blockvectorx,1,cwavef,1)
  sij_opt=0;if (gen_eigenpb) sij_opt=1
  if (ioption==1) then
   call getghc(cwavef,dimffnl,ffnl,dtfil%filstat,gwavef,swavef,gs_hamk,gvnlc,kg_k,&
&   kinpw,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,natom,blocksize,npw_k,nspinor,ntypat,&
&   nvloc,n4,n5,n6,ph3d,prtvol,sij_opt,tim_getghc,vlocal)
  else
   call timab(534,1,tsec)
   call prep_getghc(cwavef,dimffnl,dtfil,ffnl,gs_hamk,gvnlc,gwavef,swavef,iblock,1,istwf_k,kg_k,&
&   kinpw,lmnmax,matblk,maxblocksize,mgfft,mpi_enreg,mpsang,mpssoang,natom,nbdblock,&
&   nband_k,npw_k,nspinor,ntypat,nvloc,n4,n5,n6,ph3d,prtvol,sij_opt,vlocal)
   call timab(534,2,tsec)
  end if
  if (gen_eigenpb) then
   call zcopy(npw_k*nspinor*blocksize,swavef,1,blockvectorbx,1)
  else
   call zcopy(vectsize*blocksize,blockvectorx,1,blockvectorbx,1)
  endif
  call zcopy(npw_k*nspinor*blocksize,gwavef,1,blockvectorax,1)
  deallocate(cwavef,gwavef,gvnlc)
  deallocate(swavef)
  call zorthonormalize(blockvectorx,blockvectorbx,blocksize,mpi_enreg,gramxbx,vectsize)
  call ztrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,&
&            blockvectorbx,vectsize)
  call ztrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,&
&            blockvectorax,vectsize)
! do rayleigh ritz on a in space x
! gramxax=matmul(transpose(blockvectorx),blockvectorax)

  call timab(532,1,tsec)
  call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorx,&
&            vectsize,blockvectorax,vectsize,czero,gramxax,blocksize)
  call timab(532,2,tsec)

  old_paral_level= mpi_enreg%paral_level
  mpi_enreg%paral_level=3
  call xcomm_init(mpi_enreg,spaceComm)
  if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
  call timab(533,1,tsec)
  call xsum_mpi(gramxax,spaceComm,ierr)
  call timab(533,2,tsec)
  mpi_enreg%paral_level= old_paral_level
  allocate(eigen(blocksize))
! if (block_to_complete) eigen(blocksize)=zero
! call la_syev(gramxax,eigen,jobz='v')
  lwork=3*blocksize-2
  allocate(work(lwork),rwork(lwork))
  call zheev('v','u',blocksize,gramxax,blocksize,eigen,work,lwork,rwork,info)
  deallocate(work,rwork)
! blockvectorx=matmul(blockvectorx,gramxax)

  call timab(532,1,tsec)
  call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,&
&            vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize)
  call timab(532,2,tsec)

  call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorx,1)
! blockvectorax=matmul(blockvectorax,gramxax)

  call timab(532,1,tsec)
  call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,&
&            vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize)
  call timab(532,2,tsec)

  call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorax,1)
! blockvectorbx=matmul(blockvectorbx,gramxax)

  call timab(532,1,tsec)
  call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,&
&            vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize)
  call timab(532,2,tsec)

  call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbx,1)
  do iblocksize=1,blocksize
   lambda(iblocksize,iblocksize)=eigen(iblocksize)
  end do
  deallocate(eigen)

!###########################################################################
!################ PERFORM LOOP ON NLINE ####################################
!###########################################################################
! now the main alogrithm
  iter: do iterationnumber=1,maxiterations
!  construct residual
!  blockvectorr=blockvectorax-matmul(blockvectorx,lambda)
   call zprecon3(blockvectorbx,lambda,blocksize,&
&                 istwf_k,kinpw,mpi_enreg,npw_k,nspinor,&
&                 blockvectorax,blockvectorr)
   residualnorms=sum(abs(blockvectorr)**2,dim=1)
   old_paral_level= mpi_enreg%paral_level
   mpi_enreg%paral_level=3
   call xcomm_init(mpi_enreg,spaceComm)
   if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
   call timab(533,1,tsec)
   call xsum_mpi(residualnorms,spaceComm,ierr)
   call timab(533,2,tsec)
   mpi_enreg%paral_level= old_paral_level
   resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize)
   residualnorms=sqrt(residualnorms)
!  if(abs(sum(residualnorms)) < 1.d-10) exit
!  not yet masking

   if(iblock /=1) then !residuals orthogonal to blockvectorby
!      blockvectorr=blockvectorr-&
!           &matmul(blockvectory,matmul(transpose(blockvectorby),blockvectorr))
    call timab(532,1,tsec)
    call zgemm('c','n',bblocksize,blocksize,vectsize,cone,blockvectorby,&
&              vectsize,blockvectorr,vectsize,czero,gramyx,bblocksize)
    call timab(532,2,tsec)
    old_paral_level= mpi_enreg%paral_level
    mpi_enreg%paral_level=3
    call xcomm_init(mpi_enreg,spaceComm)
    if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
    call timab(533,1,tsec)
    call xsum_mpi(gramyx,spaceComm,ierr)
    call timab(533,2,tsec)
    mpi_enreg%paral_level= old_paral_level
    call timab(532,1,tsec)
    call zgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,&
&               vectsize,gramyx,bblocksize,cone,blockvectorr,vectsize)
    call timab(532,2,tsec)
   end if

!  residuals orthogonal to blockvectorx
!  blockvectorr=blockvectorr-&
!          &matmul(blockvectorx,matmul(transpose(blockvectorbx),blockvectorr))
   call timab(532,1,tsec)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
&               vectsize,blockvectorr,vectsize,czero,gramxax,blocksize)
   call timab(532,2,tsec)
   old_paral_level= mpi_enreg%paral_level
   mpi_enreg%paral_level=3
   call xcomm_init(mpi_enreg,spaceComm)
   if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
   call timab(533,1,tsec)
   call xsum_mpi(gramxax,spaceComm,ierr)
   call timab(533,2,tsec)
   mpi_enreg%paral_level= old_paral_level
   call timab(532,1,tsec)
   call zgemm('n','n',vectsize,blocksize,blocksize,cminusone,blockvectorx,&
&             vectsize,gramxax,blocksize,cone,blockvectorr,vectsize)
   call timab(532,2,tsec)

   allocate(cwavef(2,npw_k*nspinor*maxblocksize),gwavef(2,npw_k*nspinor*maxblocksize),gvnlc(2,npw_k*nspinor*maxblocksize))
   allocate(swavef(2,npw_k*nspinor*maxblocksize))
   if (block_to_complete) then
    cwavef(:,:)=zero
   end if
   call zcopy(npw_k*nspinor*blocksize,blockvectorr,1,cwavef,1)
   sij_opt=0;if (gen_eigenpb) sij_opt=1
   if (ioption==1) then
    call getghc(cwavef,dimffnl,ffnl,dtfil%filstat,gwavef,swavef,gs_hamk,gvnlc,kg_k,&
&    kinpw,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,natom,blocksize,npw_k,nspinor,ntypat,&
&    nvloc,n4,n5,n6,ph3d,prtvol,sij_opt,tim_getghc,vlocal)
   else
    call timab(534,1,tsec)
    call prep_getghc(cwavef,dimffnl,dtfil,ffnl,gs_hamk,gvnlc,gwavef,swavef,iblock,2,istwf_k,kg_k,&
&    kinpw,lmnmax,matblk,maxblocksize,mgfft,mpi_enreg,mpsang,mpssoang,natom,nbdblock,&
&    nband_k,npw_k,nspinor,ntypat,nvloc,n4,n5,n6,ph3d,prtvol,sij_opt,vlocal)
    call timab(534,2,tsec)
   end if
   if (gen_eigenpb) then
    call zcopy(npw_k*nspinor*blocksize,swavef,1,blockvectorbr,1)
   else
    call zcopy(vectsize*blocksize,blockvectorr,1,blockvectorbr,1)
   endif
   call zcopy(npw_k*nspinor*blocksize,gwavef,1,blockvectorar,1)
   deallocate(cwavef,gwavef,gvnlc)
   deallocate(swavef)
   call zorthonormalize(blockvectorr,blockvectorbr,blocksize,mpi_enreg,gramrbr,vectsize)
   call ztrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,&
&             blockvectorbr,vectsize)
   call ztrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,&
&             blockvectorar,vectsize)

   if(iterationnumber>1) then
!   call zorthonormalize(blockvectorp,blockvectorbp,blockvectorap)
    call zorthonormalize(blockvectorp,blockvectorbp,blocksize,mpi_enreg,grampbp,vectsize)
    call ztrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,&
       &              blockvectorbp,vectsize)
    call ztrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,&
       &              blockvectorap,vectsize)
!   if(iterationnumber >1) stop('xx4')
!   blockvectorap=matmul(blockvectorap,grampbp)
   end if

   activersize=blocksize
   if (iterationnumber==1) then
    activepsize=0
    restart=1
   else
    activepsize=blocksize
    restart=0
   end if

!  gramxar=matmul(transpose(blockvectorax),blockvectorr)
!  gramrar=matmul(transpose(blockvectorar),blockvectorr)
!  gramxax=matmul(transpose(blockvectorax),blockvectorx)
   call timab(532,1,tsec)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorax,&
&             vectsize,blockvectorr,vectsize,czero,gramxar,blocksize)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorar,&
&             vectsize,blockvectorr,vectsize,czero,gramrar,blocksize)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorax,&
&             vectsize,blockvectorx,vectsize,czero,gramxax,blocksize)
   call timab(532,2,tsec)
   old_paral_level= mpi_enreg%paral_level
   mpi_enreg%paral_level=3
   call xcomm_init(mpi_enreg,spaceComm)
   if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
   transf(:,:,1)=gramxar(:,:)
   transf(:,:,2)=gramrar(:,:)
   transf(:,:,3)=gramxax(:,:)
   call timab(533,1,tsec)
   call xsum_mpi(transf,spaceComm,ierr)
   call timab(533,2,tsec)
   gramxar(:,:)=transf(:,:,1)
   gramrar(:,:)=transf(:,:,2)
   gramxax(:,:)=transf(:,:,3)
   mpi_enreg%paral_level= old_paral_level

!  gramxbx=matmul(transpose(blockvectorbx),blockvectorx)
!  gramrbr=matmul(transpose(blockvectorbr),blockvectorr)
!  gramxbr=matmul(transpose(blockvectorbx),blockvectorr)
   call timab(532,1,tsec)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
&             vectsize,blockvectorx,vectsize,czero,gramxbx,blocksize)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbr,&
&             vectsize,blockvectorr,vectsize,czero,gramrbr,blocksize)
   call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
&             vectsize,blockvectorr,vectsize,czero,gramxbr,blocksize)
   call timab(532,2,tsec)
   old_paral_level= mpi_enreg%paral_level
   mpi_enreg%paral_level=3
   call xcomm_init(mpi_enreg,spaceComm)
   if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
   transf(:,:,1)=gramxbx(:,:)
   transf(:,:,2)=gramrbr(:,:)
   transf(:,:,3)=gramxbr(:,:)
   call timab(533,1,tsec)
   call xsum_mpi(transf,spaceComm,ierr)
   call timab(533,2,tsec)
   gramxbx(:,:)=transf(:,:,1)
   gramrbr(:,:)=transf(:,:,2)
   gramxbr(:,:)=transf(:,:,3)
   mpi_enreg%paral_level= old_paral_level

!###########################################################################
!################ PERFORM LOOP ON COND #####################################
!###########################################################################
!if(iterationnumber >1) stop('xx1')
   i1=0;i2=blocksize;i3=2*blocksize;i4=3*blocksize
   cond: do cond_try=1,1 !2 when restart, but not implemented
    if (restart==0) then
!    gramxap=matmul(transpose(blockvectorax),blockvectorp)
!    gramrap=matmul(transpose(blockvectorar),blockvectorp)
!    grampap=matmul(transpose(blockvectorap),blockvectorp)
     call timab(532,1,tsec)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorax,&
&               vectsize,blockvectorp,vectsize,czero,gramxap,blocksize)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorar,&
&               vectsize,blockvectorp,vectsize,czero,gramrap,blocksize)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorap,&
&               vectsize,blockvectorp,vectsize,czero,grampap,blocksize)
     call timab(532,2,tsec)
     old_paral_level= mpi_enreg%paral_level
     mpi_enreg%paral_level=3
     call xcomm_init(mpi_enreg,spaceComm)
     if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
     transf(:,:,1)=gramxap(:,:)
     transf(:,:,2)=gramrap(:,:)
     transf(:,:,3)=grampap(:,:)
     call timab(533,1,tsec)
     call xsum_mpi(transf,spaceComm,ierr)
     call timab(533,2,tsec)
     gramxap(:,:)=transf(:,:,1)
     gramrap(:,:)=transf(:,:,2)
     grampap(:,:)=transf(:,:,3)
     mpi_enreg%paral_level= old_paral_level
     bigorder=i4
     allocate(grama(i4,i4),gramb(i4,i4),eigen(i4),coordx(i4,blocksize))
     grama(:,:)=0._dp;gramb(:,:)=0._dp
     grama(i1+1:i2,i1+1:i2)=gramxax
     grama(i1+1:i2,i2+1:i3)=gramxar
     grama(i1+1:i2,i3+1:i4)=gramxap
!    grama(i2+1:i3,i1+1:i2)=transpos(gramxar)
     grama(i2+1:i3,i2+1:i3)=gramrar
     grama(i2+1:i3,i3+1:i4)=gramrap
!    grama(i3+1:i4,i1+1:i2)=transpos(gramxap)
!    grama(i3+1:i4,i2+1:i3)=transpos(gramrap)
     grama(i3+1:i4,i3+1:i4)=grampap

!    gramxbp=matmul(transpose(blockvectorbx),blockvectorp)
!    gramrbp=matmul(transpose(blockvectorbr),blockvectorp)
!    grampbp=matmul(transpose(blockvectorbp),blockvectorp)
     call timab(532,1,tsec)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
&               vectsize,blockvectorp,vectsize,czero,gramxbp,blocksize)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbr,&
&               vectsize,blockvectorp,vectsize,czero,gramrbp,blocksize)
     call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorbp,&
&               vectsize,blockvectorp,vectsize,czero,grampbp,blocksize)
     call timab(532,2,tsec)
     old_paral_level= mpi_enreg%paral_level
     mpi_enreg%paral_level=3
     call xcomm_init(mpi_enreg,spaceComm)
     if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
     transf(:,:,1)=gramxbp(:,:)
     transf(:,:,2)=gramrbp(:,:)
     transf(:,:,3)=grampbp(:,:)
     call timab(533,1,tsec)
     call xsum_mpi(transf,spaceComm,ierr)
     call timab(533,2,tsec)
     gramxbp(:,:)=transf(:,:,1)
     gramrbp(:,:)=transf(:,:,2)
     grampbp(:,:)=transf(:,:,3)
     mpi_enreg%paral_level= old_paral_level
     gramb(i1+1:i2,i1+1:i2)=gramxbx
     gramb(i1+1:i2,i2+1:i3)=gramxbr
     gramb(i1+1:i2,i3+1:i4)=gramxbp
!    gramb(i2+1:i3,i1+1:i2)=transpos(gramxbr)
     gramb(i2+1:i3,i2+1:i3)=gramrbr
     gramb(i2+1:i3,i3+1:i4)=gramrbp
!    gramb(i3+1:i4,i1+1:i2)=transpos(gramxbp)
!    gramb(i3+1:i4,i2+1:i3)=transpos(gramrbp)
     gramb(i3+1:i4,i3+1:i4)=grampbp

    else
     bigorder=i3
     allocate(grama(i3,i3),gramb(i3,i3),eigen(i3),coordx(i3,blocksize))
     grama(:,:)=0._dp;gramb(:,:)=0._dp
     grama(i1+1:i2,i1+1:i2)=gramxax
     grama(i1+1:i2,i2+1:i3)=gramxar
!    grama(i2+1:i3,i1+1:i2)=transpos(gramxar)
     grama(i2+1:i3,i2+1:i3)=gramrar
     gramb(i1+1:i2,i1+1:i2)=gramxbx
     gramb(i1+1:i2,i2+1:i3)=gramxbr
!    gramb(i2+1:i3,i1+1:i2)=transpos(gramxbr)
     gramb(i2+1:i3,i2+1:i3)=gramrbr

    end if
   end do cond
!###########################################################################
!################ END LOOP ON COND #########################################
!###########################################################################

!  call la_sygv(grama,gramb,eigen,itype=1,jobz='v')
!  if(iterationnumber >1) stop('xx')
   lwork=3*bigorder-2
   allocate(work(lwork),rwork(lwork))

   call zhegv(1,'v','u',bigorder,grama,bigorder,gramb,bigorder,eigen,&
&               work,lwork,rwork,info)
   deallocate(work,rwork)
   do iblocksize=1,blocksize
    lambda(iblocksize,iblocksize)=eigen(iblocksize)
   end do
!DEBUG
!  write(6,*)'eigen',eigen(1:blocksize)
!ENDDEBUG
   coordx=grama(:,1:blocksize)
   deallocate(grama,gramb,eigen)
   if (restart==0 .and. iterationnumber >1) then

!   blockvectorp=matmul(blockvectorr,coordx(i2+1:i3,:))+&
!&               matmul(blockvectorp,coordx(i3+1:i4,:))
    call timab(532,1,tsec)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,&
&              vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectordumm,vectsize)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorp,&
&              vectsize,coordx(i3+1:i4,:),blocksize,cone,blockvectordumm,vectsize)
    call timab(532,2,tsec)
    call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorp,1)

!   blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))+&
!&                matmul(blockvectorap,coordx(i3+1:i4,:))
    call timab(532,1,tsec)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,&
&                vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectordumm,vectsize)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorap,&
&                vectsize,coordx(i3+1:i4,:),blocksize,cone,blockvectordumm,vectsize)
    call timab(532,2,tsec)
    call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorap,1)

!   blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))+&
!&                matmul(blockvectorbp,coordx(i3+1:i4,:))
    call timab(532,1,tsec)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,&
&                vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectordumm,vectsize)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbp,&
&                vectsize,coordx(i3+1:i4,:),blocksize,cone,blockvectordumm,vectsize)
    call timab(532,2,tsec)
    call zcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbp,1)

   else

!   blockvectorp =matmul(blockvectorr,coordx(i2+1:i3,:))
    call timab(532,1,tsec)
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,&
&              vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectorp,vectsize)
!   blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,&
&              vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectorap,vectsize)
!   blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))
    call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,&
&              vectsize,coordx(i2+1:i3,:),blocksize,czero,blockvectorbp,vectsize)
    call timab(532,2,tsec)
   end if

!  blockvectorx = matmul(blockvectorx,coordx(i1+1:i2,:))+blockvectorp
   call timab(532,1,tsec)
   call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,&
&             vectsize,coordx(i1+1:i2,:),blocksize,czero,blockvectordumm,vectsize)
   call timab(532,2,tsec)
   blockvectorx = blockvectordumm+blockvectorp

!  blockvectorax= matmul(blockvectorax,coordx(i1+1:i2,:))+blockvectorap
   call timab(532,1,tsec)
   call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,&
&             vectsize,coordx(i1+1:i2,:),blocksize,czero,blockvectordumm,vectsize)
   call timab(532,2,tsec)
   blockvectorax = blockvectordumm+blockvectorap

!  blockvectorbx= matmul(blockvectorbx,coordx(i1+1:i2,:))+blockvectorbp
   call timab(532,1,tsec)
   call zgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,&
&             vectsize,coordx(i1+1:i2,:),blocksize,czero,blockvectordumm,vectsize)
   call timab(532,2,tsec)
   blockvectorbx = blockvectordumm+blockvectorbp

   deallocate(coordx)
  end do iter
!###########################################################################
!################## END LOOP ON NLINE ######################################
!###########################################################################
  residualnorms=sum(abs(blockvectorr)**2,dim=1)
  old_paral_level= mpi_enreg%paral_level
  mpi_enreg%paral_level=3
  call xcomm_init(mpi_enreg,spaceComm)
  if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
  call timab(533,1,tsec)
  call xsum_mpi(residualnorms,spaceComm,ierr)
  call timab(533,2,tsec)
  mpi_enreg%paral_level= old_paral_level
  residualnorms=sqrt(residualnorms)
  do iblocksize=1,blocksize
   iband=iblocksize+(iblock-1)*maxblocksize
   call zcopy(vectsize,blockvectorx(:,iblocksize),1,cg(:,cgindex(iband):cgindex(iband+1)-1),1)
   if(gen_eigenpb) then
    call zcopy(vectsize,blockvectorbx(:,iblocksize),1,gsc(:,gscindex(iband):gscindex(iband+1)-1),1)
   end if
  end do

! call operatorh(blockvectorx,blockvectorax,subham,subvnl)!fill also subham, subvnl

  allocate(cwavef(2,npw_k*nspinor*maxblocksize),gwavef(2,npw_k*nspinor*maxblocksize),gvnlc(2,npw_k*nspinor*maxblocksize))
  isubh=1+2*(iblock-1)*maxblocksize*((iblock-1)*maxblocksize+1)/2
  if (block_to_complete) then
   cwavef(:,:)=zero
  end if
  call zcopy(npw_k*nspinor*blocksize,blockvectorax,1,gwavef,1)
  call zcopy(npw_k*nspinor*blocksize,blockvectorx,1,cwavef,1)
  choice=1 ; signs=2 ; idir=0 ; tim_nonlop=311 ; cpopt=-1 ; paw_opt=0 ; nnlout=1; nkpg=0; sij_opt=0
  allocate(sij_loc(gs_hamk%dimekb1,ntypat*((paw_opt+1)/3)),&
           gsc_loc(2,npw_k*nspinor*blocksize*(paw_opt/3)),&
           kpg_loc(npw_k,nkpg*gs_hamk%useylm))
  if (ioption==1) then
   call nonlop(gs_hamk%atindx1,choice,cpopt,cprj_dum,gs_hamk%dimekb1,&
&              gs_hamk%dimekb2,dimffnl,dimffnl,gs_hamk%ekb,&
&              enlout,ffnl,ffnl,gs_hamk%gmet,gs_hamk%gprimd,idir,gs_hamk%indlmn,&
&              istwf_k,kg_k,kg_k,kpg_loc,kpg_loc,gs_hamk%kpoint,gs_hamk%kpoint,dum,lmnmax,matblk,&
&              mgfft,mpi_enreg,mpsang,mpssoang,natom,gs_hamk%nattyp,gs_hamk%ngfft,nkpg,nkpg,&
&              gs_hamk%nloalg,nnlout,npw_k,npw_k,nspinor,ntypat,paw_opt,gs_hamk%phkxred,&
&              gs_hamk%phkxred,gs_hamk%ph1d,ph3d,ph3d,gs_hamk%pspso,signs,sij_loc,&
&              gsc_loc,tim_nonlop,gs_hamk%ucvol,gs_hamk%useylm,cwavef,gvnlc)
  else
   call timab(571,1,tsec)
   tim_nonlop=360
   allocate(lambda_loc(blocksize))
   call prep_nonlop(gs_hamk%atindx1,choice,cpopt,cprj_dum,gs_hamk%dimekb1,gs_hamk%dimekb2,&
&                   dimffnl,gs_hamk%ekb,enlout,&
&                   ffnl,gs_hamk%gmet,gs_hamk%gprimd,iblock,1,idir,gs_hamk%indlmn,istwf_k,kg_k,&
&                   kpg_loc,gs_hamk%kpoint,lambda_loc,lmnmax,matblk,blocksize,mgfft,mpi_enreg,mpsang,mpssoang,&
&                   natom,gs_hamk%nattyp,nband_k,nbdblock,gs_hamk%ngfft,nkpg,gs_hamk%nloalg,nnlout,npw_k,&
&                   nspinor,ntypat,paw_opt,gs_hamk%phkxred,gs_hamk%ph1d,ph3d,prtvol,gs_hamk%pspso,signs,sij_loc,gsc_loc,&
&                   tim_nonlop,gs_hamk%ucvol,gs_hamk%useylm,cwavef,gvnlc)
   deallocate(lambda_loc)
   call timab(571,2,tsec)
  end if
  deallocate(sij_loc,gsc_loc,kpg_loc)

  iwavef=(iblock-1)*maxblocksize+blocksize
  allocate(csubham(blocksize,iwavef))
  csubham(:,:)=czero
  call zgemm('c','n',blocksize,iwavef,vectsize,cone,gwavef,vectsize,&
&     cg(:,icg+1:icg+vectsize*iwavef),vectsize,czero,csubham,blocksize)
  do iblocksize=1,blocksize
   do ii=1,(iblock-1)*maxblocksize+iblocksize
    subham(isubh)  =dreal(csubham(iblocksize,ii))
    subham(isubh+1)=-dimag(csubham(iblocksize,ii))
    isubh=isubh+2
   enddo
  enddo
  deallocate(csubham)
  if (gs_hamk%usepaw==0) then
   call zgemm('c','n',blocksize,iwavef,vectsize,cone,gvnlc,vectsize,&
&      cg(:,icg+1:icg+vectsize*iwavef),vectsize,czero,&
&      totvnl(1+(iblock-1)*blocksize:iblock*blocksize,1:iwavef),blocksize)
  endif
!The Vnl part of the Hamiltonian is no more stored in the packed form such as it was the case for subvnl(:).
!Now, the full matrix is stored in totvnl(:,:). This trick permits:
!1) to avoid the reconstruction of the total matrix in vtowfk.F90 (double loop over bands)
!2) to use two optimized matrix-matrix blas routine for general (in lobpcgccwf.F90) or hermitian (in vtowfk.F90)
!   operators, zgemm.f and zhemm.f respectively, rather than a triple loop in both cases.

! comm for subham and subvnl is made in vtowfk

  deallocate(cwavef,gwavef,gvnlc)
! call operators(blockvectorx,blockvectorbx,subovl)!fill also  subovl
  if((gen_eigenpb).and.(use_subovl==1)) then
   allocate(cwavef(2,npw_k*nspinor))
   allocate(gwavef(2,npw_k*nspinor))
   isubo=1+2*(iblock-1)*maxblocksize*((iblock-1)*maxblocksize+1)/2
   do iblocksize=1,blocksize
    cwavef(1,1:npw_k*nspinor)=real (blockvectorx(1:npw_k*nspinor,iblocksize))
    cwavef(2,1:npw_k*nspinor)=aimag(blockvectorx(1:npw_k*nspinor,iblocksize))
!   Call to nonlop: compute <g|S|c>

    choice=1 ; signs=2 ; idir=0 ; tim_nonlop=311 ; cpopt=-1 ; paw_opt=3 ; nnlout=1; nkpg=0

!MOST UGLY PATCH, TO BE DELETED
   spacecomm_old=mpi_enreg%comm_fft
   mpi_enreg%comm_fft=mpi_enreg%commcart
    call nonlop(gs_hamk%atindx1,choice,cpopt,cprj_dum,gs_hamk%dimekb1,0,dimffnl,dimffnl,dummy2,&
&               enlout,ffnl,ffnl,gs_hamk%gmet,gs_hamk%gprimd,idir,gs_hamk%indlmn,&
&               istwf_k,kg_k,kg_k,kpg_dum,kpg_dum,gs_hamk%kpoint,gs_hamk%kpoint,dum,lmnmax,matblk,&
&               mgfft,mpi_enreg,mpsang,mpssoang,natom,gs_hamk%nattyp,gs_hamk%ngfft,nkpg,nkpg,&
&               gs_hamk%nloalg,nnlout,npw_k,npw_k,nspinor,ntypat,paw_opt,gs_hamk%phkxred,&
&               gs_hamk%phkxred,gs_hamk%ph1d,ph3d,ph3d,gs_hamk%pspso,signs,gs_hamk%sij,&
&               gwavef,tim_nonlop,gs_hamk%ucvol,gs_hamk%useylm,cwavef,cwavef)
   mpi_enreg%comm_fft=spacecomm_old

    call zcopy(npw_k*nspinor,gwavef(:,:),1,blockvectorbx(:,iblocksize),1)
    do ii=1,(iblock-1)*maxblocksize+iblocksize
     iwavef=(ii-1)*npw_k*nspinor+icg
     cscre=zero;cscim=zero
     do ipw=1,npw_k*nspinor
      cgreipw=cg(1,ipw+iwavef);cgimipw=cg(2,ipw+iwavef)
      cscre=cscre+cgreipw*gwavef(1,ipw)+cgimipw*gwavef(2,ipw)
      cscim=cscim+cgreipw*gwavef(2,ipw)-cgimipw*gwavef(1,ipw)
     end do
!    Store real and imag parts in hermitian storage mode:
     subovl(isubo)=cscre ; subovl(isubo+1)=cscim
     isubo=isubo+2
    end do
   end do
   deallocate(cwavef,gwavef)
  end if

  deallocate(blockvectory,blockvectorby,gramyx)

  deallocate(blockvectorx,blockvectorax,blockvectorbx)
  deallocate(blockvectorr,blockvectorar,blockvectorbr)
  deallocate(blockvectorp,blockvectorap,blockvectorbp)
  deallocate(transf,blockvectordumm)
  deallocate(gramxax,gramxar,gramxap,gramrar,gramrap,grampap,gramxbx,gramxbr,&
&  gramxbp,gramrbr,gramrbp,grampbp)
  deallocate(lambda)
  deallocate(residualnorms)

!End big loop over bands inside blocks
 end do

 call timab(530,2,tsec)

end subroutine lobpcgccwf
!!***

!!****f* abinit/zorthonormalize
!! NAME
!! zorthonormalize
!!
!! FUNCTION
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (GZ,AR,MT)
!! this file is distributed under the terms of the
!! gnu general public license, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!
!! OUTPUT
!!
!! SIDE EFFECTS
!!
!! PARENTS
!!      lobpcgccIIwf,lobpcgccwf
!!
!! CHILDREN
!!      timab,wrtout,xcomm_init,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine zorthonormalize(blockvectorx,blockvectorbx,blocksize,mpi_enreg,sqgram,vectsize)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
  integer, intent(in) :: blocksize,vectsize
  complex(dp):: blockvectorx(vectsize,blocksize)
  complex(dp):: blockvectorbx(vectsize,blocksize)
  complex(dp):: sqgram(blocksize,blocksize)
  type(mpi_type) :: mpi_enreg

!Local variables-------------------------------
  integer :: iblocksize,ierr,jblocksize,info,old_paral_level,spaceComm
  real(dp) :: tsec(2)

#ifdef VMS
!DEC$ ATTRIBUTES ALIAS:'ZGEMM' :: zgemm
!DEC$ ATTRIBUTES ALIAS:'ZPOTRF' :: zpotrf
!DEC$ ATTRIBUTES ALIAS:'ZTRSM' :: ztrsm
#endif

! *********************************************************************

 call timab(535,1,tsec)

 call zgemm('c','n',blocksize,blocksize,vectsize,cone,blockvectorx,&
&           vectsize,blockvectorbx,vectsize,czero,sqgram,blocksize)
 old_paral_level= mpi_enreg%paral_level
 mpi_enreg%paral_level=3
 call xcomm_init(mpi_enreg,spaceComm)
 if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
 call xsum_mpi(sqgram,spaceComm,ierr)
 mpi_enreg%paral_level= old_paral_level
 call zpotrf('u',blocksize,sqgram,blocksize,info)
 call ztrsm('r','u','n','n',vectsize,blocksize,cone,sqgram,blocksize,&
&           blockvectorx,vectsize)

 call timab(535,2,tsec)

end subroutine zorthonormalize
!!***

!!****f* abinit/zprecon3
!!
!! NAME
!! zprecon3
!!
!! FUNCTION
!! precondition $<g|(h-e_{n,k})|c_{n,k}>$
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (dca, xg, gmr)
!! this file is distributed under the terms of the
!! gnu general public license, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  $cg(2,npw)=<g|c_{n,k}>$.
!!  $eval=current band eigenvalue=<c_{n,k}|h|c_{n,k}>$.
!!  istwf_k=option parameter that describes the storage of wfs
!!  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
!!  nspinor=number of spinorial components of the wavefunctions
!!  $vect(2,npw)=<g|h|c_{n,k}>$.
!!  npw=number of planewaves at this k point.
!!
!! OUTPUT
!!  $vect(2,npw)=<g|(h-eval)|c_{n,k}>*(polynomial ratio)$
!!
!! PARENTS
!!      lobpcgccIIwf,lobpcgccwf
!!
!! CHILDREN
!!      timab,wrtout,xcomm_init,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine zprecon3(cg,eval,blocksize,istwf_k,kinpw,mpi_enreg,npw,nspinor,ghc,vect)


 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
 integer, intent(in) :: blocksize,istwf_k,npw,nspinor
 complex(dp) :: eval(blocksize,blocksize)
 complex(dp) :: cg(npw*nspinor,blocksize),ghc(npw*nspinor,blocksize),&
 &vect(npw*nspinor,blocksize)
 real(dp) :: kinpw(npw)
 type(mpi_type) :: mpi_enreg

!Local variables-------------------------------
 integer :: iblocksize,ierr,ig,igs,ipw1,ispinor,old_paral_level,spaceComm
 real(dp) :: fac,poly,xx
 real(dp),allocatable :: ek0(:),ek0_inv(:)
 real(dp) :: tsec(2)
 character(len=500) :: message


! *************************************************************************

 call timab(536,1,tsec)
!compute mean kinetic energy of all bands
 allocate(ek0(blocksize),ek0_inv(blocksize))
 do iblocksize=1,blocksize
  if(istwf_k==1)then
   ek0(iblocksize)=0.0_dp
   do ispinor=1,nspinor
    igs=(ispinor-1)*npw
!$omp parallel do private(ig) reduction(+:ek0) &
!$omp&shared(cg,igs,kinpw,npw)
    do ig=1+igs,npw+igs
     if(kinpw(ig-igs)<huge(0.0_dp)*1.d-11)then
      ek0(iblocksize)=ek0(iblocksize)+kinpw(ig-igs)*&
&      (real(cg(ig,iblocksize))**2+aimag(cg(ig,iblocksize))**2)
     end if
    end do
!$omp end parallel do
   end do
  else if (istwf_k>=2)then
   if (istwf_k==2 .and. mpi_enreg%me_g0 == 1)then
    ek0(iblocksize)=0.0_dp ; ipw1=2
    if(kinpw(1)<huge(0.0_dp)*1.d-11)ek0(iblocksize)=0.5_dp*kinpw(1)*cg(1,iblocksize)**2
   else
    ek0(iblocksize)=0.0_dp ; ipw1=1
   end if
!$omp parallel do private(ig) reduction(+:ek0) &
!$omp&shared(cg,ipw1,kinpw,npw)
   do ig=ipw1,npw
    if(kinpw(ig)<huge(0.0_dp)*1.d-11)then
     ek0(iblocksize)=ek0(iblocksize)+&
&     kinpw(ig)*(real(cg(ig,iblocksize))**2+real(cg(ig+npw-1,iblocksize))**2)
    end if
   end do
!$omp end parallel do
  end if
 end do

 old_paral_level= mpi_enreg%paral_level
 mpi_enreg%paral_level=3
 call xcomm_init(mpi_enreg,spaceComm)
 if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%commcart
 call xsum_mpi(ek0,spaceComm,ierr)
 mpi_enreg%paral_level= old_paral_level

 do iblocksize=1,blocksize
  if(ek0(iblocksize)<1.0d-10)then
   write(message, '(a,a,a,a,a,a)' )ch10,&
&   ' precon : warning -',ch10,&
&   '  the mean kinetic energy of a wavefunction vanishes.',ch10,&
&   '  it is reset to 0.1ha.'
   call wrtout(6,message,'pers')
   ek0(iblocksize)=0.1_dp
  end if
 end do
 ek0_inv(:)=1.0_dp/ek0(:)
!
!carry out preconditioning
 do iblocksize=1,blocksize
  do ispinor=1,nspinor
   igs=(ispinor-1)*npw
!$omp parallel do private(fac,ig,poly,xx) &
!$omp&shared(cg,ek0_inv,eval,kinpw,igs,npw,vect)
   do ig=1+igs,npw+igs
    if(kinpw(ig-igs)<huge(0.0_dp)*1.d-11)then
     xx=kinpw(ig-igs)*ek0_inv(iblocksize)
!    teter polynomial ratio
     poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
     fac=poly/(poly+16._dp*xx**4)
     vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
&     eval(iblocksize,iblocksize)*cg(ig,iblocksize) )*fac
    else
     vect(ig,iblocksize)=dcmplx(0.0_dp,0.0_dp)
    end if
   end do
!$omp end parallel do
  end do
 end do
 deallocate(ek0,ek0_inv)

 call timab(536,2,tsec)

end subroutine zprecon3
!!***
