!{\src2tex{textfont=tt}}
!!****f* ABINIT/screening
!! NAME
!! screening
!!
!! FUNCTION
!!
!! Calculate screening and dielectric functions
!!
!! COPYRIGHT
!! Copyright (C) 2001-2007 ABINIT group (GMR, VO, LR, RWG, MT, MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  acell(3)=length scales of primitive translations (bohr)
!!  dtfil <type(datafiles_type)>=variables related to files
!!  rprim(3,3)=dimensionless real space primitive translations
!!
!! OUTPUT
!!  output is written in a file
!!
!! SIDE EFFECTS
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!
!! NOTES
!!
!! PARENTS
!!      driver,drivergw
!!
!! CHILDREN
!!      calc_wf_qp,ccgradvnl,cchi0,cchi0q0,cigfft,ckxcldag,cvc,density,distrb2
!!      fermi,fftwfn,findnq,findq,findshells,hdr_clean,hermitianize,identk
!!      lattice,leave_new,matcginv,matrginv,metric,mkrdim,pclock,printcm,printv
!!      rdgw,rdkss,rdlda,rdldaabinit,rdqps,setmesh,setshells,surot,testlda
!!      timab,wrscr,wrtout
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine screening(acell,dtfil,dtset,mpi_enreg,rprim)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_11util
 use interfaces_12geometry
 use interfaces_14iowfdenpot
 use interfaces_15gw
 use interfaces_lib01hidempi
#else
 use defs_interfaces
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(MPI_type),intent(inout) :: mpi_enreg
 type(datafiles_type),intent(in) :: dtfil
 type(dataset_type),intent(inout) :: dtset
!arrays
 real(dp),intent(in) :: acell(3),rprim(3,3)

!Local variables ------------------------------
 character(len=4),parameter :: ctype='RPA '
!scalars
 integer,parameter :: NOMEGAGAUSS=30,NOMEGAREAL=201,unitf=0,unitwfg=25
 integer,parameter :: unitwfr=26
 integer :: dpbytes,fermiocc,i,i1,i2,i3,ia,ib,ig,ig1,ig2,ig3,ig4,ig4x,ig4y,ig4z
 integer :: ik,inclvkb,io,iomega,iq,iqp,ir,is,isppol,istat,mpsang,natom,nbnds
 integer :: nbnds_per_proc,nbr,nel=0,ngfft1,ngfft1a,ngfft2,ngfft2a,ngfft3
 integer :: ngfft3a,ngr,ninv,nkbz,nkbzx,nkibz,nkibzr,nomega,nop,nopr,npwvec,nq
 integer :: nqptdm,nr,nshr,nsppol,ntypat,tim_fourdp
!no_abirules
! etadelta distance poles G from poles W
 real(dp),parameter :: OMEGAERMAX=100.0/Ha_eV,etadelta=0.1/Ha_eV
 real(dp) :: bzvol,domegareal,e0,efermi,epsilon0,epsilon0nlf,omegaplasma,tpula
 real(dp) :: ucvol,ula
 complex :: ctmp
 logical,parameter :: lda=.false.,rpa=.true.,testelectron=.false.
 logical,parameter :: testparticle=.true.
 logical :: analytic_continuation=.false.,contour_deformation=.false.,static=.false.
 logical :: found,ltemp,plasmon_pole_model=.true.,qeq0
 logical :: update_energies=.false.
 character(len=fnlen) :: filewfg,filewfr
 type(epsilonm1_parameters) :: ep
 type(hdr_type) :: hdr
 integer,allocatable :: grottb(:,:,:),grottbm1(:,:,:),gvec(:,:),igfft(:,:,:,:)
 integer,allocatable :: irottb(:,:),ktab(:),ktabi(:),ktabo(:),ktabr(:,:)
 integer, allocatable :: nbv(:)
 integer,allocatable :: ktab_t(:),ktabi_t(:),ktabo_t(:),ktabr_t(:,:)
 integer,allocatable :: typat(:)
 real(dp) :: a1(3),a2(3),a3(3),b1(3),b2(3),b3(3),bcc2rl(3,3),brl2cc(3,3)
 real(dp) :: gmet(3,3),gprimd(3,3),qcart(3),qcc(3),rmet(3,3),rprimd(3,3)
 real(dp) :: tsec(2),qtemp(3)
 real(dp),allocatable :: energy(:,:,:),gwenergy(:,:,:),kbz(:,:),kibz(:,:),occ(:,:,:)
 real(dp),allocatable :: op(:,:,:),q(:,:),qplusg(:),rho2(:,:),vkb(:,:,:,:),qptdm(:,:),rho_p(:,:)
 real(dp),allocatable :: vkbd(:,:,:,:),vkbsign(:,:),wtk_t(:),xcart(:,:),xred(:,:),wtk(:)
 real(dp),allocatable :: z(:),zw(:)
!Kxclda in G,G'' space dimension nr
 complex,allocatable :: chitmp(:,:),gradvnl(:,:,:,:),kxclda(:),omega(:)
 complex,allocatable :: wfg(:,:,:,:),wfr(:,:,:,:)
 complex,allocatable,target :: chi0(:,:,:)
 complex,pointer :: chi(:,:,:),eps(:,:,:),epsm1(:,:,:)
 character(len=80) :: title(2)
 character(len=500) :: message
 integer,allocatable::nband_t(:)
 integer::nband,ierr,spaceComm,me,master,min_band_proc,max_band_proc
 integer :: nscf
 real(dp),allocatable :: en_qp(:,:,:)
 complex,allocatable :: m_lda_to_qp(:,:,:,:)
!MG added for symmetrisation
 type(little_group) :: lt_q
 real(dp) :: qdummy(3)
!this is for the new calculation of vkb
 complex,allocatable :: fnl(:,:,:,:)
 complex,allocatable :: fnld(:,:,:,:,:)
!END MG
 logical::parallelism_is_on_kpoints,parallelism_is_on_bands,nonlocal,keep_in_memory,distributed,min_found,max_found

 logical,parameter::implemented=.false.
!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifndef HAVE_FORTRAN_INTERFACES
 integer :: modx
#endif
!End of the abilint section

!************************************************************************
!BEGIN EXECUTABLE SECTION

 call timab(301,1,tsec)
 call timab(302,1,tsec)

 write(message,'(7a)')&
& ' SCREENING: Calculation of the susceptibility and dielectric matrices',ch10,ch10,&
& ' Based on a program developped by R.W. Godby, V. Olevano, G. Onida, and L. Reining.',ch10,&
& ' Incorporated in ABINIT by V. Olevano, G.-M. Rignanese, and M. Torrent.',ch10

 call wrtout(ab_out,message,'COLL')

!*************************************************************************************************
! This part added by RShaltaf 14/12/06 to handle memory and parallelization
! Notes
! nonlocal: flag that handles whether all proc has an acess to KSS file or not
! keep_in_memory: flag that handles whether the wavefunctions in real and reciprocal space can be kept in 
! memory or written to auxilary files
! distributed: if false: every proc has a the complete set of wavefunctions for all bands
! distributed: if true: every proc has only the wavefunctions that correspond to the bands that it needs
! distributed: can be only true if we have bands parallelization + keep_in_memory set true
!start clock
 call pclock(0)

 call xcomm_init(mpi_enreg,spaceComm)
!Init me
 call xme_init(mpi_enreg,me)
!Init master
 call xmaster_init(mpi_enreg,master)

 call timab(331,1,tsec)

 if(dtset%localrdwf==0)then
  nonlocal=.false.! only master has an access to files
 else
  nonlocal=.true. ! all proc have an access to files
 end if

 if(dtset%mkmem==0)then
  keep_in_memory=.false.
 else
  keep_in_memory=.true.
 end if

 if(dtset%gwpara==1)then
  parallelism_is_on_kpoints=.true.
  parallelism_is_on_bands=.false.
 else if(dtset%gwpara==2)then
  parallelism_is_on_bands=.true.
  parallelism_is_on_kpoints=.false.
 else if(dtset%gwpara==3)then
  if(implemented)then
   parallelism_is_on_kpoints=.true.
   parallelism_is_on_bands=.true.
  else
   write(message, '(a,a,a,a,a,a,a)' ) ch10,&
&   ' sigma: WARNING -',ch10,&
&   '  at the moment gwpara  3 is not yet implemented ',ch10,&
&   ' gwpara 2 will be imposed',ch10
   call wrtout(6,message,'COLL')
   parallelism_is_on_bands=.true.
  end if
 else if(dtset%gwpara>3)then
  write(message, '(a,a,a,a,a)' ) ch10,&
&  ' sigma: ERROR -',ch10,&
&  '  gwpara can only be 1, 2, 3  ',ch10
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if
 
  distributed=.true.

  if(parallelism_is_on_kpoints)distributed=.false.
  if(parallelism_is_on_bands.and.(.not.keep_in_memory))distributed=.false.

 nbnds=dtset%nband(1)
 mpi_enreg%parareel=0
 mpi_enreg%paralbd=0
 mpi_enreg%paral_level=2
 if(parallelism_is_on_bands)then
  allocate(nband_t(nbnds))
  nband_t(:)=nbnds
  allocate(mpi_enreg%proc_distrb(nbnds,nbnds,1))
  call distrb2(nbnds,nband_t(:),nbnds,1,mpi_enreg)
  deallocate(nband_t)
  if(distributed)then
!   nbnds_per_proc=nbnds/mpi_enreg%nproc+mod(nbnds,mpi_enreg%nproc)
      nbnds_per_proc=0
      min_found=.false.
      max_found=.false.
      do ib=1,nbnds
       if(mpi_enreg%proc_distrb(ib,1,1)==me)nbnds_per_proc=nbnds_per_proc+1
      end do
      do ib=1,nbnds
      if(mpi_enreg%proc_distrb(ib,1,1)==me)min_found=.true.
      if(min_found)exit
      end do
      min_band_proc=ib
      do ib=nbnds,1,-1
      if(mpi_enreg%proc_distrb(ib,1,1)==me)max_found=.true.
       if(max_found)exit
      end do
      max_band_proc=ib
     if(nbnds_per_proc==0)then
     write(message,'(a,a,a,a,a,a,a,a)')ch10,&
&  ' para: BUG -',ch10,&
&  ' one or more PROC has zero number of bands ',ch10,&
&  ' this is not allowd in the moment',ch10,&
&  ' please use less number of PROCs'
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if
  else
   nbnds_per_proc=nbnds
   min_band_proc=1
   max_band_proc=nbnds
  end if
 else if(parallelism_is_on_kpoints)then
! memory is not parallelized for this option
  nbnds_per_proc=nbnds
  min_band_proc=1
  max_band_proc=nbnds
 end if
  
! default for seq cases
 if(mpi_enreg%nproc==1)then
  distributed=.false.
  keep_in_memory=.false.
  parallelism_is_on_kpoints=.false.
  parallelism_is_on_bands=.false.
  nbnds_per_proc=nbnds
  min_band_proc=1  
  max_band_proc=nbnds
 end if

!******************************************************************************************* 
 ep%gwcalctyp=dtset%gwcalctyp
 if(mod(ep%gwcalctyp,10)/=0.and.mod(ep%gwcalctyp,10)/=8) plasmon_pole_model=.false.
 if(mod(ep%gwcalctyp,10)==1) analytic_continuation=.true.
 if(mod(ep%gwcalctyp,10)==2.or.mod(ep%gwcalctyp,10)==9) contour_deformation=.true.
 if(mod(ep%gwcalctyp,10)==5.or.mod(ep%gwcalctyp,10)==6.or.mod(ep%gwcalctyp,10)==7) static=.true.
!write(message,'(a,i4)') 'gw calculation type', ep%gwcalctyp
!call wrtout(ab_out,message,'COLL')

!XG 020209 : Valerio, the value of dp is NOT in general the length
!of a real. It is a pure convention, despite the fact
!that some constructors have chosen to use the length of a real ...
!write(*,*) 'length of a real on this machine ',dp
!The following coding is safer.
!VO 030411: dpbytes is used in the opening of a direct access file
!to establish the length of the record (recl=...). It represents the
!length of real to be used in a recl statement. For example on Dec
!it should be 1 (even if a real is 4 bytes)
!because recl is based on "word" units.
!On aix it should be 4 because it is based on byte units.
!Originally to establish this machine dependent length, a function
!opening a direct access file and trying to write and reread a real (pi or e)
!was called and was returning this machine dependent length.
!However 8 is safe even if you get file sizes which can be 8 times the required.
 dpbytes=8
#if defined T3E
 dpbytes=16
#endif

 write(message,'(a,i4,a,i4,a)')' length of a real on this machine ',dpbytes,' ; dp=',dp,ch10
 call wrtout(6,message,'COLL')

!Set up the parameters of the calculation (incl. q points)
 nbnds=dtset%nband(1)

!Compute dimensional primitive translations rprimd
 call mkrdim(acell,rprim,rprimd)

!Obtain dimensional translations in reciprocal space gprimd, metrics and unit cell volume, from rprimd.
!Also output rprimd, gprimd and ucvol
 call metric(gmet,gprimd,ab_out,rmet,rprimd,ucvol)

!Define consistently npw, nsh, and ecut
 call setshells(dtset%ecutwfn,dtset%npwwfn,dtset%nshwfn,dtset%nsym,gmet,&
&               gprimd,dtset%symrel,'wfn',ucvol)
 call setshells(dtset%ecuteps,dtset%npweps,dtset%nsheps,dtset%nsym,gmet,&
&               gprimd,dtset%symrel,'eps',ucvol)

!Calculate npwvec as the biggest between dtset%npweps and dtset%npwwfn
 if(dtset%npwwfn>dtset%npweps) then
  npwvec=dtset%npwwfn
 else
  npwvec=dtset%npweps
 end if

 write(message,'(a)')' screening : will call testlda '
 call wrtout(6,message,'COLL')

!Read parameters of the KSS or QPLDA o STA file and verifify them
!MG testlda now outputs the value of nsppol read in the hdr of the _KSS file  
 call testlda(dtfil,nop,nkibz,nbr,ngr,nshr,i1,i2,ntypat,natom,mpsang,nsppol,mpi_enreg,nonlocal)
 if(i2/=0.and.i2/=1.and.i2/=2.and.i2/=502.and.i2/=602) then
  write(message,'(3a)')&
&  ' screening: ERROR -',ch10,&
&  ' unknown format found in the screening file'
  call wrtout(6,message,'COLL') 
  call leave_new('COLL')
 end if

 ninv=2
 inquire(file='nop.in',exist=ltemp)
 if(ltemp) then
  open(99,file='nop.in')
  read(99,*) nop, ninv
  close(99)
 end if

 if(npwvec>ngr) then
  write(message,'(3a)')&
& ' screening: WARNING -',ch10,                  &
& ' number of g-vectors found less then required'
  call wrtout(6,message,'COLL')

  npwvec=ngr
  if(dtset%npwwfn>ngr) dtset%npwwfn=ngr
  if(dtset%npweps>ngr) dtset%npweps=ngr
  write(message,'(a,i6,a,a,i6,a,a,i6,a)')&
&  '         calculation will proceed with npwvec = ',npwvec,ch10,      &
&  '                                       npweps = ',dtset%npweps,ch10,&
&  '                                       npwwfn = ',dtset%npwwfn,ch10
  call wrtout(6,message,'COLL')
 end if

 if(nbnds>nbr) then
  write(message,'(5a,i4,a)')&
&  ' screening: WARNING -',ch10,&
&  ' number of bands found less then required;',ch10,&
&  '         calculation will proceed with nbnds = ',nbr,ch10
  call wrtout(6,message,'COLL')
  nbnds=nbr
 end if

 allocate(qplusg(dtset%npweps),stat=istat)
 if(istat/=0) stop 'out of memory in qplusg'

 !allocate LDA electronic structure variables
 allocate(op(3,3,nop),stat=istat)
 allocate(kibz(3,nkibz),stat=istat)
 allocate(gvec(3,npwvec),stat=istat)
 if(istat/=0) stop 'out of memory in op/kibz/gvec'
 allocate(energy(nkibz,min_band_proc:max_band_proc,nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in energy'
 allocate(occ(nkibz,min_band_proc:max_band_proc,nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in occ'
 allocate(gwenergy(nkibz,min_band_proc:max_band_proc,nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in gwenergy'

 allocate(wfg(dtset%npwwfn,min_band_proc:max_band_proc,nkibz,nsppol),stat=istat)
 if(istat/=0) then
  call memerr('screening','wfg',dtset%npwwfn*nbnds_per_proc*nkibz*nsppol,'spc')
 end if
 wfg(:,:,:,:)=(0.,0.)

 !read in KS band structure and information about material
 !MG NOTE: the case nsppol==2 does not work if we are using the STA or QPLDA form  
 if(i2==0.or.i2==1) then

  write(message,'(2a)')' screening: will call rdlda ',ch10
  call wrtout(6,message,'COLL')
  if (nsppol==2) then 
   write(message,'(4a)')ch10,&
&   ' screening ERROR- ',ch10,&
&   ' nsppol=2 and QPLDA format are not compatible' 
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if 

  i1 = 1 !file unformatted -> i1=1
  call rdlda(10,nop,nbnds,nkibz,npwvec,dtset%npwwfn,nopr,nbr,nkibzr,ngr,&
&  title,i1,i2,i3,a1,a2,a3,op,gvec,kibz,energy,occ,wfg)

 else if(i2==2) then

  write(message,'(2a)')' screening : will call rdldaabinit ',ch10
  call wrtout(6,message,'COLL')
  if (nsppol==2) then 
   write(message,'(4a)')ch10,&
&  ' screening: ERROR- ',ch10,&
&  ' nsppol=2 and old wf file format are not compatible' 
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if 

  call rdldaabinit(10,nop,nbnds,nkibz,npwvec,dtset%npwwfn,&
&  title,a1,a2,a3,op,gvec,kibz,energy,occ,wfg)

 else if(i2==502.or.i2==602) then

  write(message,'(2a)')' screening : will call rdkss ',ch10
  call wrtout(6,message,'COLL')

  allocate(typat(natom),xred(3,natom),xcart(3,natom))
  allocate(vkbsign(mpsang,ntypat))
  allocate(vkb(dtset%npwwfn,ntypat,mpsang,nkibz),stat=istat)
  if(istat/=0) then 
   call memerr('screening','vkb',dtset%npwwfn*ntypat*mpsang*nkibz,'spc')
  end if 
  allocate(vkbd(dtset%npwwfn,ntypat,mpsang,nkibz),stat=istat)
  if(istat/=0) then 
   call memerr('screening','vkbd',dtset%npwwfn*ntypat*mpsang*nkibz,'spc')
  end if 
 
  !MG added nsspol as input variable and modified the shape of the output arrays
  call rdkss(dtfil,hdr,nop,nbnds,nkibz,npwvec,nsppol,dtset%npwwfn,&
&            title,a1,a2,a3,op,gvec,kibz,energy,occ,wfg,&
&            ntypat,natom,mpsang,typat,xred,vkbsign,vkb,vkbd,&
&            nel,mpi_enreg,nonlocal,min_band_proc,max_band_proc)
  do ia = 1, natom
   xcart(:,ia) = xred(1,ia)*a1(:) + xred(2,ia)*a2(:) + xred(3,ia)*a3(:)
  end do
 end if ! i2
 call pclock(10)

!Calculate b1, b2, b3 and ucvol, bzvol
!MG unlike the main abinit code, here the reciprocal vectors 
!   are defined such as a_i b_j = 2pi delta_ij
 call lattice(a1,a2,a3,b1,b2,b3,ucvol,bzvol)

!MG FIXME these quantities are not used anymore
!note that ULA is not guaranteed to be the same as that used by
!RJN; it is chosen to be sqrt(2)*|a1|
 ula=sqrt(2.0)*sqrt(a1(1)**2+a1(2)**2+a1(3)**2)
!ula should be the lattice parameter, and tpula=2pi/a
 tpula=2.0*pi/ula
 brl2cc(:,1)=b1(:) ; brl2cc(:,2)=b2(:) ; brl2cc(:,3)=b3(:)
 bcc2rl(:,:)=brl2cc(:,:)
 call matrginv(bcc2rl,3,3)
!END FIXME 

 call pclock(20)

 call findshells(b1,b2,b3,npwvec,dtset%npweps,gvec)
 !set real space mesh
 !MG here we should call getng.F90 to set up a FFT mesh which is compatible
 !with the symmetry (only rotations) of the system. We could also use an
 !augmented FFT grid
 call setmesh(gmet,gvec,ngfft1,ngfft2,ngfft3,ngfft1a,ngfft2a,ngfft3a,npwvec,dtset%npwwfn,nr,1)

 !NOTE : The following call to setmesh.f must be used 
 !if method=2 or method=3 in setmesh.f (see Src_3gw/setmesh.f).
 !write(std_out,*) ' npweps = ',dtset%npweps
 !call setmesh(gmet,gvec,ngfft1,ngfft2,ngfft3,ngfft1a,ngfft2a,ngfft3a,dtset%npweps,dtset%npwwfn,nr)

 !set up tables for FFT igfft
 allocate(igfft(npwvec,5,5,5),stat=istat)
 if(istat/=0) stop 'out of memory in igfft'
 call cigfft(npwvec,ngfft1a,ngfft1,ngfft2,ngfft3,gvec,igfft)
 call pclock(30)

 !set up table indicating rotations of r-points
 allocate(irottb(nr,nop),stat=istat)
 if(istat/=0) stop 'out of memory in irottb'
 allocate(grottb(npwvec,2,nop),grottbm1(npwvec,2,nop),stat=istat)
 if(istat/=0) stop 'out of memory grottbm1'

 call surot(op,nop,ninv,ngfft1,ngfft1a,ngfft2,ngfft3,nr,npwvec,gvec,grottb,irottb,grottbm1)
 call pclock(40)

!Set up required k-points in whole BZ; nkbzx maximum number of them
 nkbzx=nkibz*nop*2
 allocate(kbz(3,nkbzx),stat=istat)
 allocate(wtk(nkibz),stat=istat)
 if(istat/=0) stop 'out of memory'
 allocate(ktab(nkbzx),stat=istat)
 if(istat/=0) stop 'out of memory'
 allocate(ktabi(nkbzx),stat=istat)
 if(istat/=0) stop 'out of memory'
 allocate(ktabo(nkbzx),stat=istat)
 if(istat/=0) stop 'out of memory'
 allocate(ktabr(nr,nkbzx),stat=istat)
 if(istat/=0) stop 'out of memory'

 call identk(kibz,nkibz,nkbzx,nr,nop,ninv,irottb,op,kbz,ktab,ktabr,ktabi,ktabo,nkbz,wtk)
 call pclock(50)

!Find q-points q=k-kfirst
 call findnq(nkbz,kbz,nop,op,nq,ninv)

 allocate(q(3,nq))
 call findq(nkbz,kbz,nop,op,nq,q,ninv,b1,b2,b3)

 call pclock(60)
 call timab(302,2,tsec)
 call timab(303,1,tsec)

!Calculate KS wavefunctions in real space using FFT
 allocate(wfr(nr,min_band_proc:max_band_proc,nkibz,nsppol),stat=istat)
 if(istat/=0) then 
  call memerr('screening','wfr',nr*nbnds_per_proc*nkibz*nsppol,'spc')
 end if
 wfr(:,:,:,:)=(0.,0.)

 tim_fourdp=4
!MG fftwfn has been modified in order to deal with the nsppol=2 case
 call fftwfn(dtset%npwwfn,nbnds,nkibz,nr,nsppol,wfg,wfr,igfft(:,3,3,3),ngfft1,ngfft1a,&
& ngfft2,ngfft3,tim_fourdp,mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)

 if(.not.distributed)then
  if(parallelism_is_on_bands)then
!  the mpi_sum will lead to crash if the array exeeded some maximum message length
!  thus it is safe to sum this array at each k point and spin
   do is=1,nsppol
    do ik=1,nkibz
     call xsum_mpi(wfr(:,:,ik,is),spaceComm,ierr)
    end do
   end do 
  end if !parallelism
 end if !distributed

 call timab(303,2,tsec)
 call timab(304,1,tsec)

 !MG we only need to enter this part in case of self-consistent GW calculation
 !   in version 5.2.2 we were performing these calculations also for the one-shot GW 
 if(ep%gwcalctyp>=10) then 

  !FBruneval051215, read QP wavefunctions of the previous step
  write(message,'(a)')' screening: reading QP wavefunctions of the previous step'
  call wrtout(06,message,'COLL')
  !call wrtout(ab_out,message,'COLL')

  !read self-consistent transformation matrices and QP energies
  allocate(m_lda_to_qp(min_band_proc:max_band_proc,min_band_proc:max_band_proc,nkibz,nsppol),stat=istat)
  if(istat/=0) then 
   call memerr('screening','m_lda_to_qp',(max_band_proc-min_band_proc+1)**2*nkibz*nsppol,'spc')
  end if

  allocate(en_qp(nkibz,min_band_proc:max_band_proc,nsppol),stat=istat)
  if(istat/=0) then 
   call memerr('screening','en_qp',nkibz*(max_band_proc-min_band_proc+1)*nsppol,'dp')
  end if
 
  !MG added nsppol in the list of input variables
  allocate(rho_p(nr,nsppol))
  call rdqps(dtfil,ep%gwcalctyp,nkibz,nbnds,nsppol,kibz,nscf,nr,energy,en_qp,m_lda_to_qp,rho_p,&
&  min_band_proc,max_band_proc,mpi_enreg)
  deallocate(rho_p)

  energy(:,:,:)=en_qp(:,:,:)
  call calc_wf_qp(nkibz,nbnds_per_proc,nr,          nsppol,m_lda_to_qp,wfr,min_band_proc,max_band_proc)
  call calc_wf_qp(nkibz,nbnds_per_proc,dtset%npwwfn,nsppol,m_lda_to_qp,wfg,min_band_proc,max_band_proc)

  deallocate(m_lda_to_qp,en_qp)

 end if !of if(gwcalctyp=>10)

 if(.not.keep_in_memory)then
!open scratch file for wavefunctions (r,b,k,s)
  filewfr=trim(dtfil%filnam_ds(5))//'_WFR'
  write(message,'(2a)')' opening file: ',filewfr 
  call wrtout(06,message,'COLL')

  open(unit=unitwfr,file=filewfr,status='unknown',access='direct',recl=nr*nbnds*dpbytes)

  if(me==0.or. .not.(nonlocal))then
   !MG an external loop over spin has been added
   do is=1,nsppol
    do ik=1,nkibz
     write(unitwfr,rec=ik+nkibz*(is-1)) ((wfr(ir,ib,ik,is),ir=1,nr),ib=1,nbnds)
    end do
   end do 
  end if
  deallocate(wfr)

!open scratch file for wavefunction(G,b,k,s)
  filewfg=trim(dtfil%filnam_ds(5))//'_WFG'
  write(message,'(2a)')' opening file: ',filewfg 
  call wrtout(06,message,'COLL')

!MG minor BUG in version 5.2.2 : 
!   wrong value of recl (recl=dtset%npwwfn*nbnds*2*dpbytes)
!   since wfg is made of 2 single precision real variables 
!OLD CODING
! open(unit=unitwfg,file=filewfg,status='unknown',access='direct',&
!& recl=dtset%npwwfn*nbnds*2*dpbytes)
!NEW CODING
  open(unit=unitwfg,file=filewfg,status='unknown',access='direct',&
&     recl=dtset%npwwfn*nbnds*dpbytes)

  if(me==0.or. .not. (nonlocal))then
   !MG Added external loop over spin
   do is=1,nsppol
    do ik=1,nkibz
     write(unitwfg,rec=ik+nkibz*(is-1)) ((wfg(ig,ib,ik,is),ig=1,dtset%npwwfn),ib=1,nbnds)
    end do
   end do
  end if
  deallocate(wfg)

 end if ! keep_in_memory

 call pclock(70)
 call timab(304,2,tsec)
 call timab(305,1,tsec)

 !MG060926 to take into account the case in which there are two bands with 
 !         the same spin index close to the gap
 allocate(nbv(nsppol))
 !MG dtset%fixmom is passed to fermi.F90 to fix the problem with newocc in case of magnetic metals
 call fermi(hdr,nbnds,nkibz,dtset%fixmom,dtset%nsppol,wtk,energy,occ,nel,nbv,efermi,&
&  mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)

 gwenergy(:,:,:)=energy(:,:,:)

 inquire(file='in.gw',exist=update_energies)
 ep%soenergy=dtset%soenergy

 if(ep%soenergy>0.1d-4) then

  write(message,'(a,a,a,a,a,f7.3,a)')&
&  ' performing a first self-consistency',ch10,&
&  ' update of the energies in W by a scissor operator',ch10,&
&  ' applying a scissor operator of [eV] ',ep%soenergy*Ha_eV,ch10
  call wrtout(06,message,'COLL') 
  call wrtout(ab_out,message,'COLL')
  do is=1,nsppol
   do ib=nbv(is)+1,nbnds
    if(ib>=min_band_proc.and.ib<=max_band_proc)then
     gwenergy(:,ib,is)=energy(:,ib,is)+ep%soenergy
    end if
   end do
  end do

 else if(update_energies) then

!MG FIXME to be tested, this part should be treated clearer, maybe introducing 
!   an input variable, moreover what happens if the user asks for a scissor and 
!   an update of the energies as well? 
  write(message,'(a,a,a,a)')' performing a first self-consistency',ch10,&
&  ' update of the energies in W by a previous GW calculation',ch10
  call wrtout(06,message,'COLL') 
  call wrtout(ab_out,message,'COLL')
 !MG Added spin variable in the list of input variables
!FIXME in.gw should became a standard ABINIT file 
  call rdgw(nkibz,nbnds,nbv,nsppol,kibz,gwenergy)

 end if

!Calculate electron density
 allocate(rho2(nr,nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in rho2'

!!!!!!!
!XG070103 This comes from Riad branch, with nsppol added

 if(keep_in_memory)then
  call density(nbnds,nkibz,nkibz,nkbz,nsppol,nop,ninv,nr,ngfft1,ngfft1a,ngfft2,&
&  ngfft3,irottb,ucvol,wtk,occ,rho2,omegaplasma,&
&  mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands,nonlocal,wfr)
 else
  call density(nbnds,nkibz,nkibz,nkbz,nsppol,nop,ninv,nr,ngfft1,ngfft1a,ngfft2,&
&  ngfft3,irottb,ucvol,wtk,occ,rho2,omegaplasma,&
&  mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands,nonlocal)
 end if

!!!!!!!
!While this was from GMatteo branch
!MG NOTE this call should be used if wfr are not in memory
!call density(nbnds,nkibz,nkibz,nkbz,nsppol,nop,ninv,nr,ngfft1,ngfft1a,ngfft2,&
!& ngfft3,irottb,ucvol,wtk,occ,rho2,omegaplasma)

!MG this subroutine is presently used since wfr is in memory
!NOTE that we are assuming nr=nrb (see setmesh)
!call crho(irottb,nbnds,ninv,nkbz,nkibz,nkibz,nop,nr,nr,nsppol,occ,omegaplasma,rho2,ucvol,wfr,wtk)

!END OF XG070103
!!!!!!!

 call pclock(80)

 ep%nomegaer=1
 ep%nomegaei=1

!RShaltaf(030306):since on three diffrent ppmdels, the DM only on zero omega is needed
!one can save cpu time + memory by calculating the DM on one frequency
!provided that the user use nfreqre=1
 if(plasmon_pole_model)then
  if(dtset%nfreqre==1.and.dtset%nfreqim==0)then
   ep%nomegaer=1
   ep%nomegaei=0
   write(message,'(a,a,a,a,a,a)')&
&   ' the DM will be calculated on zero frequency only',ch10,&
&   ' please note that the calculated DM will not be suitable if you chose to calculate self energy',ch10,&
&   ' using plasmon pole model (ppmodel 1)',ch10
   call wrtout(06,message,'COLL') 
   call wrtout(ab_out,message,'COLL')
  end if
 end if ! plasmon_pole_model

 if(static) ep%nomegaer=0
 if(analytic_continuation .or. contour_deformation) then
  ep%nomegaei=dtset%nfreqim
  if(ep%nomegaei==0) ep%nomegaei=NOMEGAGAUSS
 end if

 if(contour_deformation) then
  ep%nomegaer=dtset%nfreqre
  ep%omegaermax=dtset%freqremax
  if(ep%nomegaer==0) ep%nomegaer=NOMEGAREAL
  if(ep%omegaermax<0.1d-4) ep%omegaermax=OMEGAERMAX
  domegareal=ep%omegaermax/(ep%nomegaer-1)
 end if

 nomega=ep%nomegaer+ep%nomegaei
 allocate(omega(nomega))

 omega(1)=0 !first omega is always zero without broadening
 do io=2,ep%nomegaer
  !probably here we are wrong as omega is neither 0 nor imag pure
  omega(io)=cmplx((io-1)*domegareal,0.0)
 end do

 if(plasmon_pole_model) then
  if(nomega==2)then
   e0=dtset%ppmfrq
   if(e0<0.1d-4) e0 = omegaplasma
   omega(2)=cmplx(0.0,e0)
  end if
 end if

 if(analytic_continuation .or. contour_deformation) then
!  allocate(z(ep%nomegaei),zw(ep%nomegaei))
!  ! calculate Gauss-Legendre Quadrature knots and weights
!  ! to calulate \int_0^\infty dx f(x)
!  ! we calculate \int_0^1 dz f(1/z - 1)/z^2
!  call coeffs_gausslegint(0.0_dp,1.0_dp,z,zw,ep%nomegaei)
!  do io=1, ep%nomegaei
!   omega(ep%nomegaer+io)=cmplx(0.0,1/z(io)-1)
!  end do
!  deallocate(z,zw)
  e0=dtset%ppmfrq
  if(e0<0.1d-4) e0 = omegaplasma
  do io=1, ep%nomegaei
   omega(ep%nomegaer+io)=cmplx(0.0,e0/3.*(exp(2./(ep%nomegaei+1)*log(4.)*io)-1.))
  end do
 end if

 write(message,'(a,a)')ch10,' calculating at frequencies omega [eV]:'
  call wrtout(06,message,'COLL')
  call wrtout(ab_out,message,'COLL')
 do io=1,nomega
  write(message,'(i3,2es16.6)')io,omega(io)*Ha_eV
  call wrtout(06,message,'COLL') 
  call wrtout(ab_out,message,'COLL')
 end do

 allocate(chitmp(dtset%npweps,dtset%npweps),stat=istat)
 if(istat/=0) then
  call memerr('screening','chitmp',dtset%npweps*dtset%npweps,'spc')
 end if 

 if(lda) then
!MG060914 FIXME to be generalized in case of spin polarized calculations
!         density.F90 or crho now calculate n_up and n_down
  if (nsppol==2) then 
   write(message,'(4a)')ch10,&
&   ' screening: ERROR- ',ch10,&
&   ' lda calculation with nsppol==2 not yet implemented'
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if 
  !calculate exchange-correlation kernel Kxclda
  !calculate Kxclda in G-space and put it into Kxclda(1+igx+igy*...)
  allocate(kxclda(nr),stat=istat)
  if(istat/=0) stop 'out of memory in kxclda'

!FIXME using only the total charge, should modify the sub in order to treat 
!      the case nsppol==2
  call ckxcldag(ngfft1,ngfft1a,ngfft2,ngfft3,nr,rho2(:,1),kxclda)
  do ig=1,dtset%npweps
   chitmp(ig,1)=kxclda(igfft(ig,3,3,3))
  end do
  write(message,'(a)')' kxclda(g)'
  call wrtout(06,message,'COLL')
  call printv(chitmp,dtset%npweps)
 end if !of if (lda)
!END FIXME

 allocate(chi0(dtset%npweps,dtset%npweps,nomega),stat=istat)
 if(istat/=0) then 
  call memerr('screening','chitmp',dtset%npweps*dtset%npweps*nomega,'spc')
 end if 

 chi=>chi0
 epsm1=>chi0
 eps=>chi0

 write(message,'(2a)')' end of initialization',ch10
 call wrtout(6,message,'COLL')

 call pclock(90)

 !the following lines added by R.Shaltaf to enable the calculations of
 !DM on one or more selected q points
 if(dtset%nqptdm>0)then
  write(message,'(3a)')&
&  ' Dielectric matrix will be calculated on some ',&
&  ' selected q points provided by the user through the input variables ',&
&  ' nqptdm and qptdm'
  call wrtout(6,message,'COLL') 
  call wrtout(ab_out,message,'COLL')
  if(dtset%nqptdm>nq)then
   write(message, '(a,a,a,a,a)')ch10,&
&   ' screening: ERROR -',ch10,&
&   ' nqptdm should not exceed the actual number of q points that already exist in BZ',ch10
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if

  allocate(qptdm(3,dtset%nqptdm))
  !PMA-MG fix problem with a possible larger dimension in dtset%qptdm 
  !qptdm(:,:)=dtset%qptdm(:,:)
  !qptdm(:,:)=dtset%qptdm(:,1:dtset%nqptdm)
  qptdm(:,1:min(dtset%nqptdm,size(dtset%qptdm,2)))=dtset%qptdm(:,1:min(dtset%nqptdm,size(dtset%qptdm,2))) 
  !check whether the q points provided by the user already exist in the set of q points'
  do iq=1,dtset%nqptdm
   found=.false.
   do iqp=1,nq
    qtemp(:)=qptdm(:,iq)-q(:,iqp)
    if(all(abs(qtemp(:))<1.0e-3))found=.true.
    if(found)exit
   end do
   if(.not.found)then
    write(message, '(a,a,a,a,a)' ) ch10,&
&    ' screening: ERROR -',ch10,&
&    ' one or more qptdm cannot be identified as regular q-points satisfying q = k - k1',ch10
    call wrtout(6,message,'COLL')
    call leave_new('COLL')
   end if
  end do ! iq
 end if !of if (dtset%nqptdm>0)
 !at this stage we are sure that all the q-points provided by the user satisfy the conditions

!MG SYMMETRIZATION IN CHI 
 !0 means no use of symmetries, 1 dont use time reversal sym, 
 !2 use both space group symmetries and time reversal
 lt_q%sym_flag=dtset%symchi
 !only to perform automatic tests
 !lt_q%sym_flag=2
 if (lt_q%sym_flag/=0) then 
  write(*,*)'screening: switching on symmetrization'
  lt_q%nop=nop
  lt_q%ninv=ninv
  lt_q%nkbz=nkbz
  allocate(lt_q%ibzq(nkbz),lt_q%ltq(2,nop),lt_q%wtksym(2,nop,nkbz))
  allocate(lt_q%tab(nkbz),lt_q%tabi(nkbz),lt_q%tabo(nkbz))
  allocate(lt_q%op(3,3,lt_q%nop))
  lt_q%op=op
 end if
!ENDMG  

!These lines added by Shaltaf for parallelization 10/08/05
!parallelization on bands by shaltaf 25/10/2006

 allocate(nband_t(nkbz*1))
 nband_t(:)=nbnds
 mpi_enreg%parareel=0
 mpi_enreg%paralbd=0
 mpi_enreg%paral_level=2

 if (parallelism_is_on_kpoints)then
  allocate(mpi_enreg%proc_distrb(nkbz,nbnds,1))
  call distrb2(nbnds,nband_t,nkbz,1,mpi_enreg)
 end if
 deallocate(nband_t)

!-----------------------------------------------------------------------
!calculate screening function and save on disc for each q-point
 write(message,'(a,a,a,a)') '----------------------',ch10,&
& ' screening : beginning of main loop',ch10
 call wrtout(06,message,'COLL')
 call timab(305,2,tsec)

 do iq=1,nq

  !added by R. Shaltaf for the selective q points calculation
  found=.false.
  if(dtset%nqptdm>0)then
   do iqp=1,dtset%nqptdm
    qtemp(:)=q(:,iq)-dtset%qptdm(:,iqp)
    if(all(abs(qtemp(:))<1.0e-3))found=.true.
    if(found)exit ! iqp
   end do
   if(.not.found)cycle ! iq
  end if

  call timab(306,1,tsec)

  qeq0=.false.
  if(all(abs(q(:,iq))<1.0e-3)) qeq0=.true.

!MG FIXME These quantities are not used anymore
  qcc(:)=matmul(brl2cc,q(:,iq))
  qcart(:)=qcc(:)/tpula
!END

  if(dtset%nqptdm==0)then
   write(message,'(2a,i2,a,f9.6,2(",",f9.6),a,a)')ch10,&
&   ' q-point number ',iq,'        q = (',(q(i,iq),i=1,3),') [r.l.u.]',ch10
    call wrtout(06,message,'COLL')
  else
   write(message,'(2a,i2,a,f9.6,2(",",f9.6),a,a)')ch10,&
&   ' q-point number ',iqp,'        q = (',(q(i,iq),i=1,3),') [r.l.u.]',ch10
   call wrtout(06,message,'COLL')
  end if

  !calculates qplusg(G)=|q+G| table for coulombian potential
  call cvc(nq,iq,q,b1,b2,b3,dtset%npweps,gvec,qplusg)

  call pclock(100*iq)
  call timab(306,2,tsec)

!MG SYMMETRIZATION IN CHI
  if (lt_q%sym_flag/=0) then
   !cheating setup_little group passing q==zero if qeq0==.true. 
   !indeed in the code q is not set to 0 but to a small non zero 
   !value in order to calculate the limit q-->0 in cchi0q0 
   !anyway it is possible to use symmetries in the calculation of the
   !rho_twilde matrix elements since in this case q==0
   !this part however should be done in a cleaner way
   qdummy(:)=q(:,iq)
   if (qeq0) qdummy=zero
   call setup_little_group(qdummy(:),gmet,nop,op,ninv,nkbz,kbz,lt_q%ibzq,&
&   lt_q%ltq,lt_q%tab,lt_q%tabo,lt_q%tabi,lt_q%wtksym)
  end if 
!END MG

  if(qeq0) then !calculates chi0q0

   call timab(307,1,tsec)

!  different treatment of the case q=0
   inclvkb=dtset%inclvkb 

   if(inclvkb==1) then 
!   implementation based on Legendre polynomials (CPU and mem ~npwwfn^2)

    allocate(gradvnl(3,dtset%npwwfn,dtset%npwwfn,nkibz),stat=istat)
    if(istat/=0) then
     call memerr('screening','gradvnl',3*dtset%npwwfn*dtset%npwwfn*nkibz,'spc')
    end if 
    call ccgradvnl(dtset%npwwfn,nkibz,b1,b2,b3,gvec,kibz,ntypat,&
&                  natom,mpsang,typat,xcart,vkbsign,vkb,vkbd,gradvnl)

   else if (inclvkb==2) then 
!   implementation based on spherical harmonics (much faster since CPU and mem ~npwwfn)

    allocate(fnl(dtset%npwwfn,mpsang*mpsang,natom,nkibz),stat=istat)
     if(istat/=0) call memerr('screening','fnl',dtset%npwwfn*mpsang*mpsang*natom*nkibz,'spc')

    allocate(fnld(3,dtset%npwwfn,mpsang*mpsang,natom,nkibz),stat=istat)
     if(istat/=0) call memerr('screening','fnld',3*dtset%npwwfn*mpsang*mpsang*natom*nkibz,'spc')

    call ccgradvnl_ylm(dtset%npwwfn,nkibz,a1,a2,a3,b1,b2,b3,gvec,kibz,ntypat,natom,&
&    mpsang,typat,xcart,vkbsign,vkb,vkbd,fnl,fnld)

   end if ! inclvkb
!PM-MG in 5.3.1 the dimension of k-table was not correct, we were using nkibz
!               istead of nkbz (kibz,nkibz,nkibz --> kibz,nkibz,nkbz)
!             
   if(keep_in_memory)then
    call cchi0q0(q(:,iq),nomega,omega,gvec,npwvec,dtset%npweps,dtset%npwwfn,op,nop,&
&    kibz,nkibz,nkbz,nbnds,nbv,nsppol,occ,ktab,ktabr,ktabi,ktabo,kbz,nkbz,&
&    irottb,ngfft1,ngfft1a,ngfft2,ngfft3,igfft,nr,energy,gwenergy,etadelta,chi0,b1,b2,b3,&
&    ucvol,wtk,grottb,inclvkb,gradvnl,mpi_enreg,grottbm1,lt_q,natom,mpsang,fnl,fnld,&
&    min_band_proc,max_band_proc,&
&    parallelism_is_on_kpoints,parallelism_is_on_bands,nbnds_per_proc,distributed,nonlocal,wfr,wfg)
   else 
    call cchi0q0(q(:,iq),nomega,omega,gvec,npwvec,dtset%npweps,dtset%npwwfn,op,nop,&
&    kibz,nkibz,nkbz,nbnds,nbv,nsppol,occ,ktab,ktabr,ktabi,ktabo,kbz,nkbz,&
&    irottb,ngfft1,ngfft1a,ngfft2,ngfft3,igfft,nr,energy,gwenergy,etadelta,chi0,b1,b2,b3,&
&    ucvol,wtk,grottb,inclvkb,gradvnl,mpi_enreg,grottbm1,lt_q,natom,mpsang,fnl,fnld,&
&    min_band_proc,max_band_proc,&
&    parallelism_is_on_kpoints,parallelism_is_on_bands,nbnds_per_proc,distributed,nonlocal)
   end if ! keep_in_memory

   if(inclvkb==1)deallocate(gradvnl)
   if(inclvkb==2)deallocate(fnl,fnld)
   if(allocated(wfg))deallocate(wfg)

   call timab(307,2,tsec)

  else ! calculate cchi0 at q/=0

   call timab(308,1,tsec)
!PM-MG in 5.3.1 the dimension of k-table was not correct, we were using nkibz
!               istead of nkbz (nkibz,nkibz,nbnds --> nkibz,nkbz,nbnds)
   if(keep_in_memory)then
    call cchi0(q(:,iq),nomega,omega,npwvec,dtset%npweps,dtset%npwwfn,nkibz,nkbz,nbnds,&
&    nbv,nsppol,occ,ktab,ktabr,ktabi,kbz,nkbz,ngfft1,ngfft1a,ngfft2,ngfft3,&
&    igfft,nr,energy,gwenergy,etadelta,chi0,mpi_enreg,nop,grottbm1,lt_q,min_band_proc,max_band_proc,&
&    parallelism_is_on_kpoints,parallelism_is_on_bands,nbnds_per_proc,distributed,nonlocal,wfr)

   else 
    call cchi0(q(:,iq),nomega,omega,npwvec,dtset%npweps,dtset%npwwfn,nkibz,nkbz,nbnds,&
&    nbv,nsppol,occ,ktab,ktabr,ktabi,kbz,nkbz,ngfft1,ngfft1a,ngfft2,ngfft3,&
&    igfft,nr,energy,gwenergy,etadelta,chi0,mpi_enreg,nop,grottbm1,lt_q,min_band_proc,max_band_proc,&
&    parallelism_is_on_kpoints,parallelism_is_on_bands,nbnds_per_proc,distributed,nonlocal)

   end if ! keep_in_memory

   call timab(308,2,tsec)

  end if !calculate cchi0 at q/=0
 
  call timab(309,1,tsec)

! Print chi0(q,G,G'',omega)
  if(mpi_enreg%me==0)then !=============================== only master works 

   do iomega=1,nomega
    write(message,'(a,i4,a)')' chi(G,G") at the ',iomega,' th omega'
    call wrtout(06,message,'COLL')
    if(dtset%nqptdm>0)then
     write(message,'(a,i3,a,i4,a)')' chi0(q=',iqp,',omega=',iomega,',G,G")'
     call wrtout(06,message,'COLL')
    else
     write(message,'(a,i3,a,i4,a)')' chi0(q=',iq,',omega=',iomega,',G,G")'
     call wrtout(06,message,'COLL')
    end if
    call printcm(chi0(:,:,iomega),dtset%npweps,dtset%npweps)
   end do

   call pclock(100*iq+10)

   if(rpa) then
    chi0(:,:,:)=chi0(:,:,:)/ucvol
! calculate epsilon=(1-Vc*chi0)
    do io=1,nomega
     do ig2=1,dtset%npweps
      chitmp(:,ig2)=(-(4.0*pi)/(qplusg(:)*qplusg(ig2)))*chi0(:,ig2,io)
      chitmp(ig2,ig2)=1+chitmp(ig2,ig2)
     end do
     eps(:,:,io)=chitmp(:,:)
    end do

    do iomega=1,nomega
     write(message,'(a,i4,a)')' epsilon(G,G") at the ',iomega,' th omega'
     call wrtout(06,message,'COLL')
     call printcm(eps(:,:,iomega),dtset%npweps,dtset%npweps)
    end do

    if(qeq0) epsilon0nlf=real(eps(1,1,1))

    do io=1,nomega
     call matcginv(eps(:,:,io),dtset%npweps,dtset%npweps)
    end do

    do iomega=1,nomega
     write(message,'(a,i4,a)')' epsilon^-1(G,G") at the ',iomega,' th omega'
     call wrtout(06,message,'COLL')
     call printcm(epsm1(:,:,iomega),dtset%npweps,dtset%npweps)
    end do

! calculate macroscopic dielectric constant
! epsmlf=1/epsm1(G=0,G''=0)
    if(qeq0) epsilon0=real(1.0/epsm1(1,1,1))

    if(qeq0 .and. .not.plasmon_pole_model) then
     open(69,file='out.elf')
     do io=1,ep%nomegaer
      write(69,*) real(omega(io))*Ha_eV, -aimag(epsm1(1,1,io))
     end do
     close(69)
    end if

   else !of if(rpa)

    do iomega=1,nomega
!MG060924 this logical condition is always false, and should be eliminated
     if(rpa) then
      !zeroes chitmp
      chitmp(:,:)=0.0
     else if(lda) then
      !calculate chi0*KxcLDA and put it in chitmp
      !calculate chi0*Kxc in G-space
      do ig1=1,dtset%npweps
       do ig2=1,dtset%npweps
        ctmp=(0.0,0.0)
        do ig3=1,dtset%npweps
         ig4x=modx(gvec(1,ig3)-gvec(1,ig2),ngfft1)
         ig4y=modx(gvec(2,ig3)-gvec(2,ig2),ngfft2)
         ig4z=modx(gvec(3,ig3)-gvec(3,ig2),ngfft3)
         ig4=1+ig4x+ig4y*ngfft1a+ig4z*ngfft1a*ngfft2
         ctmp=ctmp+chi0(ig1,ig3,iomega)*kxclda(ig4)
        end do
        chitmp(ig1,ig2)=ctmp
       end do
      end do
     end if

     if(.not.rpa) then
     !write(6,'(a,i2,a,i1,a)') ' chi0*Kxc(q=',iq,',omega=',iomega,',G,G")'
     !call printcm(chitmp,dtset%npweps,dtset%npweps)
     end if

! Calculate chi
    !write(*,*)
    !&    'calculating chi=(1-chi0*Vc-chi0*Kxc)^-1 chi0'

     !calculate (1-chi0*Vc-chi0*Kxc ) and put it in chitmp
     do ig1=1,dtset%npweps
      do ig2=1,dtset%npweps
       chitmp(ig1,ig2)=(-(4.0*pi)/(qplusg(ig2)**2))*&
&       chi0(ig1,ig2,iomega)-chitmp(ig1,ig2)
      end do
      chitmp(ig1,ig1)=chitmp(ig1,ig1)+ucvol
     end do

! Invert (1-chi0*Vc-chi0*Kxc)
     call matcginv(chitmp,dtset%npweps,dtset%npweps)
 
! Multiply for chi0
     !call cdotm(chitmp,chi0(1,1,iomega),dtset%npweps)
     chitmp=matmul(chitmp,chi0(:,:,iomega))

     !must multiply by ucvol (see notes-due to conversion of r->G in
     !matrix definitions)
     chi(:,:,iomega)=chitmp(:,:)*ucvol

    end do !iomega

! Print chi
    if(me==0)then
     do iomega=1,1
      write (6,'(a,i2,a,i1,a)')' chi(q=',iq,',omega=',iomega,',G,G")'
      call printcm(chi(:,:,iomega),dtset%npweps,dtset%npweps)
     end do
    end if

! Calculate epsilon-twiddle^-1
   !write(*,*)  'calculating epsilon^-1(G,G")=1+(Vc+Kxc)*chi'
    do iomega=1,nomega
     if(.not.testparticle) then
      if(rpa) then
       !zeroes chitmp
       chitmp(:,:)=(0.0,0.0)
      else if(lda) then
       !calculate Kxclda*chi and put in chitmp
       do ig1=1,dtset%npweps
        do ig2=1,dtset%npweps
         ctmp=(0.0,0.0)
         do ig3=1,dtset%npweps
          ig4x=modx(gvec(1,ig1)-gvec(1,ig3),ngfft1)
          ig4y=modx(gvec(2,ig1)-gvec(2,ig3),ngfft2)
          ig4z=modx(gvec(3,ig1)-gvec(3,ig3),ngfft3)
          ig4=1+ig4x+ig4y*ngfft1a+ig4z*ngfft1a*ngfft2
          ctmp=ctmp+kxclda(ig4)*chi(ig3,ig2,iomega)
         end do
         chitmp(ig1,ig2)=ctmp
        end do
       end do
      end if
! Perform hermitianization
      call hermitianize(chitmp,dtset%npweps)
     else !(testparticle)
      !zeroes chitmp
      chitmp(:,:)=(0.0,0.0)
     end if
     do ig1=1,dtset%npweps
      epsm1(ig1,:,iomega)=(4*pi/(qplusg(ig1)*qplusg(:)*ucvol))*&
&      chi(ig1,:,iomega)+chitmp(ig1,:)/ucvol
       !chitmp=Kxc*chi must be divided by ucvol see notes on fft r->G
      epsm1(ig1,ig1,iomega)=1.0+epsm1(ig1,ig1,iomega)
     end do ! ig
    end do ! iomega

! Print epsilon-twiddle^-1
    do iomega=1,2
     write (message,'(a,i2,a,i1,a)')' epsilon-twiddle^-1(q=',iq,',omega=',iomega,',G,G")'
     call wrtout(06,message,'COLL')
     call printcm(epsm1(:,:,iomega),dtset%npweps,dtset%npweps)
    end do

! Find dielectric constant epsilon0 with lf
    if(qeq0) then
     do io=1,nomega
      if(abs(real(omega(io)))<1.e-3) then
       if(abs(aimag(omega(io)))<1.e-3) then
        epsilon0=real(1.0/epsm1(1,1,io))
       end if
      end if
     end do
    end if

   end if !of if(rpa)

   call timab(309,2,tsec)
   call timab(310,1,tsec)

! Write epsilon^-1 on file
! on title(1) it is better to write the name of the system (e.g. Silicon bulk..)
   title(1)='em1 file: epsilon^-1'
   if(testparticle) then
    title(2)='TESTPARTICLE'
   else
    title(2)='TESTELECTRON'
   end if
   title(2)(14:17)=ctype

   call wrscr(dtfil,hdr,dtset,dtset%npweps,dtset%npwwfn,npwvec,nbnds,nq,nomega,q,omega,&
&             gvec,iq,epsm1,title,unitf,nop,op)

   if(qeq0) then
    do io=1,nomega
     if(abs(real(omega(io)))<1.e-3) then
      if(abs(aimag(omega(io)))<1.e-3) then
       write(message,'(a,f8.4)')&
&       ' dielectric constant = ',epsilon0
       call wrtout(6,message,'COLL')
       call wrtout(ab_out,message,'COLL')
       write(message,'(a,f8.4,a)')&
&       ' dielectric constant without local fields = ',epsilon0nlf,ch10
       call wrtout(6,message,'COLL') 
       call wrtout(ab_out,message,'COLL')
      end if
     end if
    end do ! io
   end if ! qeq0

  end if !=========================================== only master

  call pclock(100*iq+90)
  call timab(310,2,tsec)

 end do !iq

 call pclock(9999)
 if( mpi_enreg%nproc==1 .or. (.not.nonlocal) .or. me==0 )then

  if(i2==502.or.i2==602) then
   call hdr_clean(hdr)
  end if
 
 end if

 close(unitwfg)
 close(unitwfr)

 deallocate(chitmp,chi0,energy,grottb,grottbm1)
 deallocate(gvec,gwenergy,igfft,irottb,kibz)
 deallocate(occ,omega,op,q,qplusg,rho2)
 deallocate(kbz)
 deallocate(wtk)
 deallocate(ktab)
 deallocate(ktabi)
 deallocate(ktabo)
 deallocate(ktabr)
 if(mpi_enreg%nproc>1)then
 deallocate(mpi_enreg%proc_distrb)
 end if
 deallocate(nbv)
 if (lt_q%sym_flag/=0)then
  deallocate(lt_q%ibzq,lt_q%ltq,lt_q%wtksym)
  deallocate(lt_q%op,lt_q%tab,lt_q%tabi,lt_q%tabo)
 end if
 if(i2==502.or.i2==602) deallocate(typat,xred,xcart,vkb,vkbd,vkbsign)
 if(lda) deallocate(kxclda)
 if(allocated(wfr))deallocate(wfr)
 if(allocated(wfg))deallocate(wfg)

 write(message,'(a)')' screening ended' 
 call wrtout(6,message,'COLL')

 call timab(301,2,tsec)

 end subroutine screening
!!***
