1 | subroutine hecmw_precond_SSOR_33_setup(hecMAT)
| 2 | implicit none
| 3 | type(hecmwST_matrix), intent(inout) :: hecMAT
| 4 | integer(kind=kint ) :: NPL, NPU, NPCL, NPCU
| 5 | real (kind=kreal), allocatable :: CD(:)
| 6 | integer(kind=kint ) :: NCOLOR_IN
| 7 | real (kind=kreal) :: SIGMA_DIAG
| 8 | real (kind=kreal) :: ALUtmp(3,3), PW(3)
| 9 | integer(kind=kint ) :: ii, i, j, k
| 10 | integer(kind=kint ) :: nthreads = 1
| 11 | integer(kind=kint ), allocatable :: perm_tmp(:)
| 12 | !real (kind=kreal) :: t0
| 13 |
| 14 | !t0 = hecmw_Wtime()
| 15 | !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
| 16 |
| 17 | if (INITIALIZED) then
| 18 | if (hecMAT%Iarray(98) == 1) then ! need symbolic and numerical setup
| 19 | call hecmw_precond_SSOR_33_clear(hecMAT)
| 20 | else if (hecMAT%Iarray(97) == 1) then ! need numerical setup only
| 21 | call hecmw_precond_SSOR_33_clear(hecMAT) ! TEMPORARY
| 22 | else
| 23 | return
| 24 | endif
| 25 | endif
| 26 |
| 27 | !$ nthreads = omp_get_max_threads()
| 28 |
| 29 | N = hecMAT%N
| 30 | ! N = hecMAT%NP
| 31 | NCOLOR_IN = hecmw_mat_get_ncolor_in(hecMAT)
| 32 | SIGMA_DIAG = hecmw_mat_get_sigma_diag(hecMAT)
| 33 | NContact = hecMAT%cmat%n_val
| 34 |
| 35 | if (NContact.gt.0) then
| 36 | call hecmw_cmat_LU( hecMAT )
| 37 | endif
| 38 |
| 39 | if (nthreads == 1) then
| 40 | NColor = 1
| 41 | allocate(COLORindex(0:1), perm(N), iperm(N))
| 42 | COLORindex(0) = 0
| 43 | COLORindex(1) = N
| 44 | do i=1,N
| 45 | perm(i) = i
| 46 | iperm(i) = i
| 47 | end do
| 48 | else
| 49 | allocate(COLORindex(0:N), perm_tmp(N), perm(N), iperm(N))
| 50 | call hecmw_matrix_ordering_RCM(N, hecMAT%indexL, hecMAT%itemL, &
| 51 | hecMAT%indexU, hecMAT%itemU, perm_tmp, iperm)
| 52 | !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
| 53 | call hecmw_matrix_ordering_MC(N, hecMAT%indexL, hecMAT%itemL, &
| 54 | hecMAT%indexU, hecMAT%itemU, perm_tmp, &
| 55 | NCOLOR_IN, NColor, COLORindex, perm, iperm)
| 56 | !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
| 57 | deallocate(perm_tmp)
| 58 |
| 59 | !call write_debug_info
| 60 | endif
| 61 |
| 62 | NPL = hecMAT%indexL(N)
| 63 | NPU = hecMAT%indexU(N)
| 64 | allocate(indexL(0:N), indexU(0:N), itemL(NPL), itemU(NPU))
| 65 | call hecmw_matrix_reorder_profile(N, perm, iperm, &
| 66 | hecMAT%indexL, hecMAT%indexU, hecMAT%itemL, hecMAT%itemU, &
| 67 | indexL, indexU, itemL, itemU)
| 68 | !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
| 69 |
| 70 | !call check_ordering
| 71 |
| 72 | allocate(D(9*N), AL(9*NPL), AU(9*NPU))
| 73 | call hecmw_matrix_reorder_values(N, 3, perm, iperm, &
| 74 | hecMAT%indexL, hecMAT%indexU, hecMAT%itemL, hecMAT%itemU, &
| 75 | hecMAT%AL, hecMAT%AU, hecMAT%D, &
| 76 | indexL, indexU, itemL, itemU, AL, AU, D)
| 77 | !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
| 78 |
| 79 | call hecmw_matrix_reorder_renum_item(N, perm, indexL, itemL)
| 80 | call hecmw_matrix_reorder_renum_item(N, perm, indexU, itemU)
| 81 |
| 82 | if (NContact.gt.0) then
| 83 | NPCL = hecMAT%indexCL(N)
| 84 | NPCU = hecMAT%indexCU(N)
| 85 | allocate(indexCL(0:N), indexCU(0:N), itemCL(NPCL), itemCU(NPCU))
| 86 | call hecmw_matrix_reorder_profile(N, perm, iperm, &
| 87 | hecMAT%indexCL, hecMAT%indexCU, hecMAT%itemCL, hecMAT%itemCU, &
| 88 | indexCL, indexCU, itemCL, itemCU)
| 89 |
| 90 | allocate(CD(9*N), CAL(9*NPCL), CAU(9*NPCU))
| 91 | call hecmw_matrix_reorder_values(N, 3, perm, iperm, &
| 92 | hecMAT%indexCL, hecMAT%indexCU, hecMAT%itemCL, hecMAT%itemCU, &
| 93 | hecMAT%CAL, hecMAT%CAU, hecMAT%D, &
| 94 | indexCL, indexCU, itemCL, itemCU, CAL, CAU, CD)
| 95 | deallocate(CD)
| 96 |
| 97 | call hecmw_matrix_reorder_renum_item(N, perm, indexCL, itemCL)
| 98 | call hecmw_matrix_reorder_renum_item(N, perm, indexCU, itemCU)
| 99 | end if
| 100 |
| 101 | allocate(ALU(9*N))
| 102 | ALU = 0.d0
| 103 |
| 104 | do ii= 1, 9*N
| 105 | ALU(ii) = D(ii)
| 106 | enddo
| 107 |
| 108 | if (NContact.gt.0) then
| 109 | do k= 1, hecMAT%cmat%n_val
| 110 | if (hecMAT%cmat%pair(k)%i.ne.hecMAT%cmat%pair(k)%j) cycle
| 111 | ii = iperm( hecMAT%cmat%pair(k)%i )
| 112 | ALU(9*ii-8) = ALU(9*ii-8) + hecMAT%cmat%pair(k)%val(1, 1)
| 113 | ALU(9*ii-7) = ALU(9*ii-7) + hecMAT%cmat%pair(k)%val(1, 2)
| 114 | ALU(9*ii-6) = ALU(9*ii-6) + hecMAT%cmat%pair(k)%val(1, 3)
| 115 | ALU(9*ii-5) = ALU(9*ii-5) + hecMAT%cmat%pair(k)%val(2, 1)
| 116 | ALU(9*ii-4) = ALU(9*ii-4) + hecMAT%cmat%pair(k)%val(2, 2)
| 117 | ALU(9*ii-3) = ALU(9*ii-3) + hecMAT%cmat%pair(k)%val(2, 3)
| 118 | ALU(9*ii-2) = ALU(9*ii-2) + hecMAT%cmat%pair(k)%val(3, 1)
| 119 | ALU(9*ii-1) = ALU(9*ii-1) + hecMAT%cmat%pair(k)%val(3, 2)
| 120 | ALU(9*ii ) = ALU(9*ii ) + hecMAT%cmat%pair(k)%val(3, 3)
| 121 | enddo
| 122 | endif
| 123 |
| 124 | !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
| 125 | !$omp do
| 126 | do ii= 1, N
| 127 | ALUtmp(1,1)= ALU(9*ii-8) * SIGMA_DIAG
| 128 | ALUtmp(1,2)= ALU(9*ii-7)
| 129 | ALUtmp(1,3)= ALU(9*ii-6)
| 130 | ALUtmp(2,1)= ALU(9*ii-5)
| 131 | ALUtmp(2,2)= ALU(9*ii-4) * SIGMA_DIAG
| 132 | ALUtmp(2,3)= ALU(9*ii-3)
| 133 | ALUtmp(3,1)= ALU(9*ii-2)
| 134 | ALUtmp(3,2)= ALU(9*ii-1)
| 135 | ALUtmp(3,3)= ALU(9*ii ) * SIGMA_DIAG
| 136 | do k= 1, 3
| 137 | ALUtmp(k,k)= 1.d0/ALUtmp(k,k)
| 138 | do i= k+1, 3
| 139 | ALUtmp(i,k)= ALUtmp(i,k) * ALUtmp(k,k)
| 140 | do j= k+1, 3
| 141 | PW(j)= ALUtmp(i,j) - ALUtmp(i,k)*ALUtmp(k,j)
| 142 | enddo
| 143 | do j= k+1, 3
| 144 | ALUtmp(i,j)= PW(j)
| 145 | enddo
| 146 | enddo
| 147 | enddo
| 148 | ALU(9*ii-8)= ALUtmp(1,1)
| 149 | ALU(9*ii-7)= ALUtmp(1,2)
| 150 | ALU(9*ii-6)= ALUtmp(1,3)
| 151 | ALU(9*ii-5)= ALUtmp(2,1)
| 152 | ALU(9*ii-4)= ALUtmp(2,2)
| 153 | ALU(9*ii-3)= ALUtmp(2,3)
| 154 | ALU(9*ii-2)= ALUtmp(3,1)
| 155 | ALU(9*ii-1)= ALUtmp(3,2)
| 156 | ALU(9*ii )= ALUtmp(3,3)
| 157 | enddo
| 158 | !$omp end do
| 159 | !$omp end parallel
| 160 |
| 161 | isFirst = .true.
| 162 |
| 163 | INITIALIZED = .true.
| 164 | hecMAT%Iarray(98) = 0 ! symbolic setup done
| 165 | hecMAT%Iarray(97) = 0 ! numerical setup done
| 166 |
| 167 | !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
| 168 |
| 169 | end subroutine hecmw_precond_SSOR_33_setup
|
|