30# ifndef WIN32_LEAN_AND_MEAN
31# define WIN32_LEAN_AND_MEAN
52 template<
class T>
int SparseMatrix<T>::UseAlloc=0;
60 internalAllocator.set(blockSize);
72 _maxEntriesPerRow = 0;
84 if(
M._contiguous )
Resize(
M.rows ,
M._maxEntriesPerRow );
86 for(
int i=0 ; i<
rows ; i++ )
96 for(
int i=0 ; i<rows ; i++ )
e +=
int( rowSizes[i] );
102 if(
M._contiguous ) Resize(
M.rows ,
M._maxEntriesPerRow );
103 else Resize(
M.rows );
104 for(
int i=0 ; i<rows ; i++ )
106 SetRowSize( i ,
M.rowSizes[i] );
119 if( !
fp )
return false;
128 if( !
fp )
return false;
136 if(
fwrite( &rows ,
sizeof(
int ) , 1 ,
fp )!=1 )
return false;
137 if(
fwrite( rowSizes ,
sizeof(
int ) , rows ,
fp )!=rows )
return false;
138 for(
int i=0 ; i<rows ; i++ )
if(
fwrite( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] ,
fp )!=rowSizes[i] )
return false;
145 if(
fread( &r ,
sizeof(
int ) , 1 ,
fp )!=1 )
return false;
147 if(
fread( rowSizes ,
sizeof(
int ) , rows ,
fp )!=rows )
return false;
148 for(
int i=0 ; i<rows ; i++ )
166 if( _contiguous ){
if( _maxEntriesPerRow )
free( m_ppElements[0] ); }
167 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] )
free( m_ppElements[i] ); }
168 free( m_ppElements );
174 rowSizes = (
int* )
malloc(
sizeof(
int ) * r );
175 memset( rowSizes , 0 ,
sizeof(
int ) * r );
179 _maxEntriesPerRow = 0;
187 if( _contiguous ){
if( _maxEntriesPerRow )
free( m_ppElements[0] ); }
188 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] )
free( m_ppElements[i] ); }
189 free( m_ppElements );
195 rowSizes = (
int* )
malloc(
sizeof(
int ) * r );
196 memset( rowSizes , 0 ,
sizeof(
int ) * r );
199 for(
int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] +
e;
202 _maxEntriesPerRow =
e;
210 if (count > _maxEntriesPerRow)
212 POISSON_THROW_EXCEPTION (
pcl::poisson::PoissonBadArgumentException,
"Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count <<
" > (maximum)" << _maxEntriesPerRow );
214 rowSizes[
row] = count;
216 else if(
row>=0 &&
row<rows )
218 if( UseAlloc ) m_ppElements[
row] = internalAllocator.newElements(count);
221 if( rowSizes[
row] )
free( m_ppElements[
row] );
231 Resize(this->m_N, this->
m_M);
239 (*
this)(
ij,
ij) = T(1);
253 for (
int i=0; i<this->
Rows(); i++)
255 for(
int ii=0;
ii<m_ppElements[i].
size();i++){m_ppElements[i][
ii].Value*=
V;}
264 for(
int i=0; i<
R.Rows(); i++){
266 int N=m_ppElements[i][
ii].N;
267 T Value=m_ppElements[i][
ii].Value;
269 R(i,
M.m_ppElements[N][
jj].N) += Value *
M.m_ppElements[N][
jj].Value;
282 for (
int i=0; i<rows; i++)
285 for(
int ii=0;
ii<rowSizes[i];
ii++){
286 temp+=m_ppElements[i][
ii].Value *
V.m_pV[m_ppElements[i][
ii].N];
297#pragma omp parallel for num_threads( threads ) schedule( static )
298 for(
int i=0 ; i<rows ; i++ )
302 for(
int j=0 ; j<rowSizes[i] ; j++ )
temp += m_ppElements[i][j].Value *
In.m_pV[m_ppElements[i][j].N];
324 for (
int i=0; i<this->
Rows(); i++)
327 M(m_ppElements[i][
ii].N,i) = m_ppElements[i][
ii].Value;
339 solution.Resize( b.Dimensions() );
343 r.Resize( solution.Dimensions() );
344 M.Multiply( solution , r );
348 for(
int i=0 ; i<r.Dimensions() ; i++ )
delta_new += r.m_pV[i] * r.m_pV[i];
353 q.Resize( d.Dimensions() );
356 M.Multiply( d ,
q , threads );
358 for(
int i=0 ; i<d.Dimensions() ; i++ )
dDotQ += d.m_pV[i] *
q.m_pV[i];
360#pragma omp parallel for num_threads( threads ) schedule( static )
361 for(
int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] *
T2(
alpha );
364 r.Resize( solution.Dimensions() );
365 M.Multiply( solution , r , threads );
369#pragma omp parallel for num_threads( threads ) schedule( static )
370 for(
int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] -
q.m_pV[i] *
T2(
alpha);
374 for(
int i=0 ; i<r.Dimensions() ; i++ )
delta_new += r.m_pV[i]*r.m_pV[i];
376#pragma omp parallel for num_threads( threads ) schedule( static )
377 for(
int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] *
T2(
beta );
391 solution.Resize(
M.Columns());
469 int dim =
int(
In.Dimensions() );
475#pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
476 for(
int t=0 ;
t<threads ;
t++ )
479 memset( out , 0 ,
sizeof(
T2 ) * dim );
495 dim =
int(
Out.Dimensions() );
497#pragma omp parallel for num_threads( threads ) schedule( static )
498 for(
int i=0 ; i<dim ; i++ )
507#pragma omp parallel for num_threads( threads )
508 for(
int t=0 ;
t<threads ;
t++ )
511 memset( out , 0 ,
sizeof(
T2 ) * dim );
525 dim =
int(
Out.Dimensions() );
527#pragma omp parallel for num_threads( threads ) schedule( static )
528 for(
int i=0 ; i<dim ; i++ )
540 int dim =
In.Dimensions();
543#pragma omp parallel for num_threads( threads )
544 for(
int t=0 ;
t<threads ;
t++ )
546#pragma omp parallel for num_threads( threads )
547 for(
int t=0 ;
t<threads ;
t++ )
566#pragma omp parallel for num_threads( threads ) schedule( static )
567 for(
int i=0 ; i<
Out.Dimensions() ; i++ )
574#if defined _WIN32 && !defined __MINGW32__
575#ifndef _AtomicIncrement_
576#define _AtomicIncrement_
609 const float*
in = &
In[0];
610 float* out = &
Out[0];
612#pragma omp parallel for num_threads( threads )
613 for(
int t=0 ;
t<threads ;
t++ )
614 for(
int i=partition[
t] ; i<partition[
t+1] ; i++ )
618 const float&
in_i =
in[i];
623 float v =
temp->Value;
630#pragma omp parallel for num_threads( threads )
631 for(
int i=0 ; i<
A.rows ; i++ )
635 const float&
in_i =
in[i];
640 float v =
temp->Value;
651 const double*
in = &
In[0];
652 double* out = &
Out[0];
655#pragma omp parallel for num_threads( threads )
656 for(
int t=0 ;
t<threads ;
t++ )
657 for(
int i=partition[
t] ; i<partition[
t+1] ; i++ )
661 const double&
in_i =
in[i];
673#pragma omp parallel for num_threads( threads )
674 for(
int i=0 ; i<
A.rows ; i++ )
678 const double&
in_i =
in[i];
696 int dim = b.Dimensions();
705 T2 *
_x = &x[0] , *
_r = &r[0] , *
_d = &d[0] , *
_q = &
q[0];
706 const T2*
_b = &b[0];
708 std::vector< int > partition( threads+1 );
711 for(
int i=0 ; i<
A.rows ; i++ ) eCount +=
A.rowSizes[i];
713#pragma omp parallel for num_threads( threads )
714 for(
int t=0 ;
t<threads ;
t++ )
717 for(
int i=0 ; i<
A.rows ; i++ )
727 partition[threads] =
A.rows;
734#pragma omp parallel for num_threads( threads ) schedule( static )
735 for(
int i=0 ; i<dim ; i++ )
_d[i] =
_r[i] =
temp[i] -
_r[i];
740#pragma omp parallel for num_threads( threads ) schedule( static )
741 for(
int i=0 ; i<dim ; i++ )
_d[i] =
_r[i] =
_b[i] -
_r[i];
744 for( std::size_t i=0 ; i<dim ; i++ )
delta_new +=
_r[i] *
_r[i];
757 for(
int i=0 ; i<dim ; i++ )
dDotQ +=
_d[i] *
_q[i];
759#pragma omp parallel for num_threads( threads ) schedule( static )
760 for(
int i=0 ; i<dim ; i++ )
_x[i] +=
_d[i] *
alpha;
761 if( (
ii%50)==(50-1) )
766#pragma omp parallel for num_threads( threads ) schedule( static )
767 for(
int i=0 ; i<dim ; i++ )
_r[i] =
_b[i] -
_r[i];
770#pragma omp parallel for num_threads( threads ) schedule( static )
771 for(
int i=0 ; i<dim ; i++ )
_r[i] -=
_q[i] *
alpha;
775 for( std::size_t i=0 ; i<dim ; i++ )
delta_new +=
_r[i] *
_r[i];
777#pragma omp parallel for num_threads( threads ) schedule( static )
778 for(
int i=0 ; i<dim ; i++ )
_d[i] =
_r[i] +
_d[i] *
beta;
787 int threads =
scratch.threads();
789 int dim =
int( b.Dimensions() );
791 if( reset ) x.Resize( dim );
793 T2 *
_x = &x[0] , *
_r = &r[0] , *
_d = &d[0] , *
_q = &
q[0];
794 const T2*
_b = &b[0];
799 A.Multiply( x ,
temp ,
scratch ,
addDCTerm ) ,
A.Multiply(
temp , r ,
scratch ,
addDCTerm ) ,
A.Multiply( b ,
temp ,
scratch ,
addDCTerm );
800#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
806#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
821#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
822 for(
int i=0 ; i<dim ; i++ )
dDotQ +=
_d[i] *
_q[i];
826 if( (
ii%50)==(50-1) )
828#pragma omp parallel for num_threads( threads )
829 for(
int i=0 ; i<dim ; i++ )
_x[i] +=
_d[i] *
alpha;
833#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
837#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
841#pragma omp parallel for num_threads( threads )
842 for(
int i=0 ; i<dim ; i++ )
_d[i] =
_r[i] +
_d[i] *
beta;
851 int dim =
int( b.Dimensions() );
853 if( threads<1 ) threads = 1;
854 if( threads>1 )
outScratch.resize( threads , dim );
855 if( reset ) x.Resize( dim );
859 T2 *
_x = &x[0] , *
_r = &r[0] , *
_d = &d[0] , *
_q = &
q[0];
860 const T2*
_b = &b[0];
866 if( threads>1 )
A.Multiply( x ,
temp ,
outScratch ,
addDCTerm ) ,
A.Multiply(
temp , r ,
outScratch ,
addDCTerm ) ,
A.Multiply( b ,
temp ,
outScratch ,
addDCTerm );
868#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
875#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
899#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
900 for(
int i=0 ; i<dim ; i++ )
dDotQ +=
_d[i] *
_q[i];
905 if( (
ii%50)==(50-1) )
907#pragma omp parallel for num_threads( threads )
908 for(
int i=0 ; i<dim ; i++ )
_x[i] +=
_d[i] *
alpha;
920#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
925#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
930#pragma omp parallel for num_threads( threads )
931 for(
int i=0 ; i<dim ; i++ )
_d[i] =
_r[i] +
_d[i] *
beta;
944 solution.Resize(b.Dimensions());
948 for(
int i=0 ; i<
iters ; i++ )
950 M.Multiply( solution ,
Md );
955 for(
int j=0 ; j<
int(
M.rows) ; j++ ) solution[j] += (b[j]-
Md[j])/
diagonal[j];
Iterator class for point clouds with or without given indices.
std::size_t size() const
Size of the range the iterator is going through.
An exception that is thrown when the arguments number or type is wrong/unhandled.
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
void write(std::ostream &stream, Type value)
Function for writing data to a stream.