TuttleOFX
1
|
00001 #include "analyze.hpp" 00002 00003 #include "detail/FileNumbers.hpp" 00004 #include "detail/FileStrings.hpp" 00005 #include "commonDefinitions.hpp" 00006 00007 #include <boost/regex.hpp> 00008 #include <boost/unordered_map.hpp> 00009 #include <boost/lambda/lambda.hpp> 00010 #include <boost/foreach.hpp> 00011 #include <set> 00012 00013 00014 namespace sequenceParser { 00015 00016 using detail::FileNumbers; 00017 using detail::FileStrings; 00018 namespace bfs = boost::filesystem; 00019 00020 bool detectDirectoryInResearch( std::string& researchPath, std::vector<std::string>& filters, std::string& filename ) 00021 { 00022 if( bfs::exists( researchPath ) ) 00023 { 00024 if( !bfs::is_directory( researchPath ) ) 00025 { 00026 // the researchPath is an existing file, we search into the parent directory with filtering these filename 00027 // warning: can find a sequence based on a filename 00028 bfs::path tmpPath( researchPath ); 00029 filename = researchPath; 00030 00031 if( tmpPath.has_parent_path() ) 00032 researchPath = tmpPath.parent_path().string(); 00033 else 00034 researchPath = "."; 00035 } 00036 // else 00037 // the researchPath is a directory, we search into the directory without filtering 00038 } 00039 else 00040 { 00041 bfs::path tmpPath( researchPath ); 00042 if( !tmpPath.has_parent_path() ) 00043 { 00044 filters.push_back( researchPath ); 00045 researchPath = "."; 00046 return true; 00047 } 00048 bfs::path parentPath( tmpPath.parent_path() ); 00049 if( !bfs::exists( parentPath ) ) 00050 { 00051 // researchPath and it parent don't exists, could not find file/sequence/folder 00052 return false; 00053 } 00054 // the researchPath is not a directory, but it parent is a directory 00055 // we search in this parent directory, with the filtering pattern 00056 filters.push_back( tmpPath.filename().string() ); 00057 researchPath = parentPath.string(); 00058 } 00059 return true; 00060 } 00061 00062 Sequence privateBuildSequence( 00063 const Sequence& defaultSeq, 00064 const FileStrings& stringParts, 00065 const std::vector<FileNumbers>::const_iterator& numberPartsBegin, 00066 const std::vector<FileNumbers>::const_iterator& numberPartsEnd, 00067 const std::size_t index, 00068 const std::size_t padding, 00069 const bool strictPadding 00070 ) 00071 { 00072 const std::size_t len = numberPartsBegin->size(); 00073 Sequence sequence( defaultSeq ); 00074 00075 // fill information in the sequence... 00076 for( std::size_t i = 0; i < index; ++i ) 00077 { 00078 sequence._prefix += stringParts[i]; 00079 sequence._prefix += numberPartsBegin->getString( i ); 00080 } 00081 sequence._prefix += stringParts[index]; 00082 for( std::size_t i = index + 1; i < len; ++i ) 00083 { 00084 sequence._suffix += stringParts[i]; 00085 sequence._suffix += numberPartsBegin->getString( i ); 00086 } 00087 sequence._suffix += stringParts[len]; 00088 00089 std::vector<FileNumbers>::const_iterator numberPartsLast = numberPartsEnd; 00090 --numberPartsLast; 00091 00092 // standard case, one sequence detected 00093 sequence._firstTime = numberPartsBegin->getTime( index ); 00094 sequence._lastTime = numberPartsLast->getTime( index ); 00095 sequence._nbFiles = std::distance( numberPartsBegin, numberPartsEnd ); 00096 00097 sequence.extractStep( numberPartsBegin, numberPartsEnd, index ); 00098 //sequence.extractPadding( numberPartsBegin, numberPartsEnd, index ); 00099 sequence._padding = padding; 00100 sequence._strictPadding = strictPadding; 00101 //sequence.extractIsStrictPadding( numberPartsBegin, numberPartsEnd, index, sequence._padding ); 00102 00103 return sequence; 00104 } 00105 00106 /** 00107 * 00108 * @param result 00109 * @param numberPartsBegin 00110 * @param numberPartsEnd 00111 * @param index 00112 */ 00113 void privateBuildSequencesAccordingToPadding( 00114 std::vector<Sequence>& result, 00115 const Sequence& defaultSeq, 00116 const FileStrings& stringParts, 00117 const std::vector<FileNumbers>::iterator& numberPartsBegin, 00118 const std::vector<FileNumbers>::iterator numberPartsEnd, 00119 const int index ) 00120 { 00121 std::set<std::size_t> paddings; 00122 std::set<std::size_t> ambiguousPaddindDigits; 00123 for( std::vector<FileNumbers>::const_iterator it = numberPartsBegin; 00124 it != numberPartsEnd; 00125 ++it ) 00126 { 00127 const std::size_t padding = it->getPadding(index); 00128 const std::size_t nbDigits = it->getNbDigits(index); 00129 00130 paddings.insert( padding ); 00131 00132 if( padding == 0 ) 00133 { 00134 ambiguousPaddindDigits.insert( nbDigits ); 00135 } 00136 } 00137 00138 if( paddings.size() == 1 ) 00139 { 00140 // standard case: only one padding used in the sequence! 00141 const std::size_t p = *paddings.begin(); 00142 // simple sort 00143 std::sort( numberPartsBegin, numberPartsEnd, FileNumbers::SortByNumber() ); 00144 result.push_back( privateBuildSequence( defaultSeq, stringParts, numberPartsBegin, numberPartsEnd, index, p, (p!=0) ) ); 00145 return; 00146 } 00147 00148 bool onlyConsiderPadding = false; 00149 if( paddings.find( 0 ) == paddings.end() ) 00150 { 00151 // No element without padding. 00152 // All parts are prefixed by 0, only strict padding, 00153 // so we can sort by padding without ambiguity 00154 onlyConsiderPadding = true; 00155 } 00156 else 00157 { 00158 // We have a mix of padding and no padding. 00159 // It may be the same number of digits (strict or ambiguous padding: [09, 10]). 00160 // Some ambiguous cases: 00161 // -------------------------------------------------------------------------------- 00162 // | sort by padding | sort by padding | sort by digits | 00163 // -------------------------------------------------------------------------------- 00164 // | number padding digits | number padding digits | number padding digits | 00165 // | 1 0 1 | 100 0 3 | 1 0 1 | 00166 // | 5 0 1 | 102 0 3 | 5 0 1 | 00167 // | 10 0 2 | | 10 0 2 | 00168 // | 100 0 3 | 001 3 3 | | 00169 // | 102 0 3 | 002 3 3 | 001 3 3 | 00170 // | 1000 0 4 | 099 3 3 | 002 3 3 | 00171 // | | | 099 3 3 | 00172 // | 001 3 3 | 0001 4 4 | 100 0 3 | 00173 // | 002 3 3 | 0123 4 4 | 102 0 3 | 00174 // | 099 3 3 | 1234 4 4 | | 00175 // | | | 0001 4 4 | 00176 // | 0001 4 4 | | 0123 4 4 | 00177 // | 0123 4 4 | | 1234 4 4 | 00178 // | 1234 4 4 | | | 00179 // | | | 10000 0 5 | 00180 // -------------------------------------------------------------------------------- 00181 // | | The sequence without | One sequence without | 00182 // | One sequence without | padding can be merge | padding, should use a | 00183 // | padding | in sequence with | sort by padding | 00184 // | | padding 3 | | 00185 // -------------------------------------------------------------------------------- 00186 // | YES | NO : sort by digits | NO : sort by padding | 00187 // -------------------------------------------------------------------------------- 00188 onlyConsiderPadding = false; 00189 BOOST_FOREACH( const std::size_t dig, ambiguousPaddindDigits ) 00190 { 00191 if( paddings.find( dig ) == paddings.end() ) 00192 { 00193 // if one digits from ambiguous digits doesn't correspond to 00194 // a padding... we keep the whole sequence without padding. 00195 onlyConsiderPadding = true; 00196 break; 00197 } 00198 } 00199 } 00200 00201 if( onlyConsiderPadding ) 00202 { 00203 //std::cout << "Detector onlyConsiderPadding: " << __LINE__ << std::endl; 00204 // sort by padding 00205 std::sort( numberPartsBegin, numberPartsEnd, FileNumbers::SortByPadding() ); 00206 // split when the padding changed 00207 std::vector<FileNumbers>::const_iterator start = numberPartsBegin; 00208 for( std::vector<FileNumbers>::const_iterator it = boost::next(start); it != numberPartsEnd; ++it ) 00209 { 00210 if( start->getPadding(index) != it->getPadding(index) ) 00211 { 00212 const std::size_t p = start->getPadding(index); 00213 result.push_back( privateBuildSequence( defaultSeq, stringParts, start, it, index, p, (p!=0) ) ); 00214 start = it; 00215 } 00216 } 00217 const std::size_t p = start->getPadding(index); 00218 result.push_back( privateBuildSequence( defaultSeq, stringParts, start, numberPartsEnd, index, p, (p!=0) ) ); 00219 return; 00220 } 00221 else 00222 { 00223 //std::cout << "Detector onlyConsiderDigits: " << __LINE__ << std::endl; 00224 // sort by digits 00225 std::sort( numberPartsBegin, numberPartsEnd, FileNumbers::SortByDigit() ); 00226 // split when the number of digits changed 00227 std::vector<FileNumbers>::const_iterator start = numberPartsBegin; 00228 for( std::vector<FileNumbers>::const_iterator it = boost::next(numberPartsBegin); it != numberPartsEnd; ++it ) 00229 { 00230 if( start->getNbDigits(index) != it->getNbDigits(index) ) 00231 { 00232 const std::size_t p = boost::prior(it)->getPadding(index); 00233 const std::size_t pStart = start->getPadding(index); 00234 result.push_back( privateBuildSequence( defaultSeq, stringParts, start, it, index, pStart, (p!=pStart) ) ); 00235 start = it; 00236 } 00237 } 00238 const std::size_t p = boost::prior(numberPartsEnd)->getPadding(index); 00239 const std::size_t pStart = start->getPadding(index); 00240 result.push_back( privateBuildSequence( defaultSeq, stringParts, start, numberPartsEnd, index, pStart, (p!=pStart) ) ); 00241 return; 00242 } 00243 } 00244 00245 00246 bool getVaryingNumber( std::ssize_t& index, const FileNumbers& a, const FileNumbers& b ) 00247 { 00248 BOOST_ASSERT( a.size() == b.size() ); 00249 bool foundOne = false; 00250 for( std::size_t i = 0; i < a.size(); ++i ) 00251 { 00252 if( a.getString(i) != b.getString(i) ) 00253 { 00254 if( foundOne ) 00255 { 00256 index = -1; 00257 return false; // more than one element founded 00258 } 00259 foundOne = true; 00260 index = i; 00261 } 00262 } 00263 if( !foundOne ) 00264 index = -1; 00265 return foundOne; // we found one varying index 00266 } 00267 00268 std::vector<Sequence> buildSequences( const boost::filesystem::path& directory, const FileStrings& stringParts, std::vector<FileNumbers>& numberParts, const EMaskOptions& desc ) 00269 { 00270 Sequence defaultSeq( directory, desc ); 00271 BOOST_ASSERT( numberParts.size() > 0 ); 00272 // assert all FileNumbers have the same size... 00273 BOOST_ASSERT( numberParts.front().size() == numberParts.back().size() ); 00274 const std::size_t len = numberParts.front().size(); 00275 std::vector<Sequence> result; 00276 00277 if( numberParts.size() <= 1 ) 00278 { 00279 privateBuildSequencesAccordingToPadding( result, defaultSeq, stringParts, numberParts.begin(), numberParts.end(), len-1 ); 00280 return result; 00281 } 00282 00283 // detect which part is the sequence number 00284 // for the moment, accept only one sequence 00285 // but we can easily support multi-sequences 00286 std::vector<std::size_t> allIndex; // vector of indices (with 0 < index < len) with value changes 00287 for( std::size_t i = 0; i < len; ++i ) 00288 { 00289 const std::string t = numberParts.front().getString( i ); 00290 00291 BOOST_FOREACH( const FileNumbers& sn, numberParts ) 00292 { 00293 if( sn.getString( i ) != t ) 00294 { 00295 allIndex.push_back( i ); 00296 break; 00297 } 00298 } 00299 } 00300 00301 //std::cout << "allIndex.size(): " << allIndex.size() << std::endl; 00302 00303 if( allIndex.size() == 1 ) 00304 { 00305 // if it's a simple sequence, but may be mix multiple paddings 00306 privateBuildSequencesAccordingToPadding( result, defaultSeq, stringParts, numberParts.begin(), numberParts.end(), allIndex.front() ); 00307 return result; 00308 } 00309 00310 // it's a multi-sequence 00311 00312 // ambiguous example 00313 // 1 2 3 00314 // 1 3 3 00315 // 1 4 3 00316 // 1 5 3 // could go in both sequences 00317 //// split here 00318 // 1 5 4 00319 // 1 5 5 00320 // 1 5 6 00321 // 1 5 7 00322 00323 std::sort( numberParts.begin(), numberParts.end(), FileNumbers::SortByPadding() ); 00324 00325 std::vector<FileNumbers>::iterator start = numberParts.begin(); 00326 std::vector<FileNumbers>::iterator it = boost::next(start); 00327 std::vector<FileNumbers>::iterator itEnd = numberParts.end(); 00328 std::ssize_t previousIndex = -1; 00329 std::ssize_t index = -1; 00330 bool split = false; 00331 00332 for( ; it != itEnd; ++it ) 00333 { 00334 //std::cout << "________________________________________" << std::endl; 00335 //std::cout << "start: " << *start << std::endl; 00336 //std::cout << "it: " << *it << std::endl; 00337 if( getVaryingNumber( index, *start, *it ) ) 00338 { 00339 if( previousIndex != -1 && // we previously have a sequence and 00340 index != previousIndex ) // the index is not the same than previous: split! 00341 { 00342 split = true; 00343 } 00344 // else 00345 // { 00346 // // we don't have a sequence before, there is now one varying number, 00347 // // so it's the next sequence 00348 // } 00349 } 00350 else 00351 { 00352 // more than one varying number: split in all cases! 00353 split = true; 00354 // if no sequence before... the file is alone... 00355 // Set the number as the last number. 00356 if( previousIndex == -1 ) 00357 previousIndex = start->size() - 1; 00358 } 00359 if( split ) 00360 { 00361 privateBuildSequencesAccordingToPadding( result, defaultSeq, stringParts, start, it, previousIndex ); 00362 split = false; 00363 index = -1; 00364 start = it; 00365 } 00366 previousIndex = index; 00367 } 00368 if( previousIndex == -1 ) 00369 previousIndex = start->size() - 1; 00370 privateBuildSequencesAccordingToPadding( result, defaultSeq, stringParts, start, it, previousIndex ); 00371 00372 return result; 00373 } 00374 00375 std::size_t decomposeFilename( const std::string& filename, FileStrings& stringParts, FileNumbers& numberParts, const EMaskOptions& options ) 00376 { 00377 static const std::size_t max = std::numeric_limits<std::size_t>::digits10; 00378 std::string regex; 00379 if( options & eMaskOptionsNegativeIndexes ) 00380 { 00381 regex = "[\\+\\-]?+\\d{1," + boost::lexical_cast<std::string>( max ) + "}"; 00382 } 00383 else 00384 { 00385 regex = "\\d{1," + boost::lexical_cast<std::string>( max ) + "}"; 00386 } 00387 const boost::regex re( regex ); 00388 static const int subs[] = { -1, 0 }; // get before match and current match 00389 boost::sregex_token_iterator m( filename.begin(), filename.end(), re, subs ); 00390 boost::sregex_token_iterator end; 00391 00392 // std::cout << "________________________________________" << std::endl; 00393 // std::cout << "filename: " << filename << std::endl; 00394 // std::cout << "regex: " << regex << std::endl; 00395 while( m != end ) 00396 { 00397 // begin with string id, can be an empty string if str begins with a number 00398 // std::cout << "stringPart: " << *m << std::endl; 00399 stringParts.getId().push_back( *m++ ); 00400 if( m != end ) // if end with a string and not a number 00401 { 00402 // std::cout << "numberPart: " << *m << std::endl; 00403 numberParts.push_back( *m++ ); 00404 } 00405 } 00406 if( stringParts.getId().size() == numberParts.size() ) 00407 { 00408 stringParts.getId().push_back( "" ); // we end with an empty string 00409 } 00410 //std::cout << numberParts.size() << std::endl; 00411 return numberParts.size(); 00412 } 00413 00414 }