Skip to content

Commit c86cf4f

Browse files
committed
patch support show progress;
1 parent c603ba7 commit c86cf4f

File tree

8 files changed

+94
-26
lines changed

8 files changed

+94
-26
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,10 @@ options:
148148
DEFAULT -p-4; requires more memory!
149149
-p-search-searchThreadNumber
150150
DEFAULT searchThreadNumber same as parallelThreadNumber;
151-
but multi-thread search need frequent random disk reads when
152-
-s-matchBlockSize or -block-fastMatchBlockSize(run with -m) is too small,
153-
causes slowdown; at this time, need to reduce the number of searchThreadNumber!
154-
if (searchThreadNumber<=1) then to close multi-thread search mode.
151+
old file on HDD hard drives WARNING: multi-thread search need frequent random
152+
disk reads when -s-matchBlockSize or -block-fastMatchBlockSize(run with -m),
153+
causes slowdown; at this time, need to close(searchThreadNumber<=1) multi-thread
154+
search mode or reduce the number of searchThreadNumber!
155155
-c-compressType[-compressLevel]
156156
set outDiffFile Compress type, DEFAULT uncompress;
157157
for resave diffFile,recompress diffFile to outDiffFile by new set;

README_cn.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ $ git clone https://github.com/sisong/bzip2.git ../bzip2
121121
如果设置为-block-0,意思是关闭基于块的提前匹配;
122122
快速块匹配大小fastMatchBlockSize>=4, 推荐128,4k,64k等;
123123
如果新版本和旧版本相同数据比较多,那diff速度就会比较快,并且减少内存占用,
124-
但有很小的可能补丁包会变大
124+
但有很小的可能补丁包会稍微变大
125125
-cache
126126
必须和-m配合使用;
127127
给较慢的匹配开启一个大型缓冲区,来加快匹配速度(不影响补丁大小), 默认不开启;
@@ -147,9 +147,9 @@ $ git clone https://github.com/sisong/bzip2.git ../bzip2
147147
默认为4;需要占用较多的内存。
148148
-p-search-searchThreadNumber
149149
默认情况下搜索线程数searchThreadNumber的值和parallelThreadNumber相同;
150-
但当 -s-matchBlockSize 或 -block-fastMatchBlockSize(和-m配合时) 的值太小时,多线程
151-
搜索需要频繁的随机磁盘读取,导致速度变慢;这时就需要降低searchThreadNumber搜索线程数!
152-
如果设置searchThreadNumber<=1,可以关闭多线程搜索模式。
150+
旧文件在HDD硬盘上时的警告:在使用-s-matchBlockSize 或 -block-fastMatchBlockSize(和-m配合时)时,
151+
多线程搜索需要频繁的随机磁盘读取,这可能会导致速度变慢;这时就需要关闭(searchThreadNumber<=1)多
152+
线程搜索模式或者降低搜索线程数searchThreadNumber的值!
153153
-c-compressType[-compressLevel]
154154
设置补丁数据使用的压缩算法和压缩级别等, 默认不压缩;
155155
补丁另存时,使用新的压缩参数设置来输出新补丁;
@@ -173,7 +173,7 @@ $ git clone https://github.com/sisong/bzip2.git ../bzip2
173173
警告: lzma和lzma2是不同的压缩编码格式。
174174
-c-zstd[-{0..22}[-dictBits]] 默认级别 20
175175
压缩字典比特数dictBits 可以为10到30, 默认为23。
176-
支持多线程并行压缩,较快。
176+
支持多线程并行压缩,较快(但内存占用会比较大)
177177
-C-checksumType
178178
为文件夹间diff设置数据校验算法, 默认为fadler64;
179179
支持的校验选项:

compress_plugin_demo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,6 +1212,10 @@ int _default_setParallelThreadNumber(hdiff_TCompress* compressPlugin,int threadN
12121212
&&((dict_bits-1)>=_ZSTD_WINDOWLOG_MIN)) {
12131213
--dict_bits;
12141214
}
1215+
# if (IS_NOTICE_compress_canceled)
1216+
printf(" (used one zstd dictSize: %" PRIu64 " (input data: %" PRIu64 "))\n",
1217+
((hpatch_StreamPos_t)1)<<dict_bits,in_data->streamSize);
1218+
# endif
12151219
ret=ZSTD_CCtx_setParameter(s,ZSTD_c_windowLog,dict_bits);
12161220
if (ZSTD_isError(ret)) _compress_error_return("ZSTD_CCtx_setParameter(,ZSTD_c_windowLog)");
12171221
if (plugin->thread_num>1){

hdiffz.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,10 @@ static void printUsage(){
222222
" DEFAULT -p-4; requires more memory!\n"
223223
" -p-search-searchThreadNumber\n"
224224
" DEFAULT searchThreadNumber same as parallelThreadNumber;\n"
225-
" but multi-thread search need frequent random disk reads when\n"
226-
" -s-matchBlockSize or -block-fastMatchBlockSize(run with -m) is too small,\n"
227-
" causes slowdown; at this time, need to reduce the number of searchThreadNumber!\n"
228-
" if (searchThreadNumber<=1) then to close multi-thread search mode.\n"
225+
" old file on HDD hard drives WARNING: multi-thread search need frequent random\n"
226+
" disk reads when -s-matchBlockSize or -block-fastMatchBlockSize(run with -m),\n"
227+
" causes slowdown; at this time, need to close(searchThreadNumber<=1) multi-thread\n"
228+
" search mode or reduce the number of searchThreadNumber!\n"
229229
#endif
230230
" -c-compressType[-compressLevel]\n"
231231
" set outDiffFile Compress type, DEFAULT uncompress;\n"

hpatchz.c

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
# define printf(...)
5050
# define _log_info_utf8(...) do{}while(0)
5151
#endif
52+
#ifndef _IS_NEED_PRINT_PROGRESS
53+
# define _IS_NEED_PRINT_PROGRESS _IS_NEED_PRINT_LOG
54+
#endif
5255

5356
#ifndef _IS_NEED_MAIN
5457
# define _IS_NEED_MAIN 1
@@ -1196,6 +1199,47 @@ static TByte* getPatchMemCache(hpatch_BOOL isLoadOldAll,size_t patchCacheSize,si
11961199
return temp_cache;
11971200
}
11981201

1202+
#if (_IS_NEED_PRINT_PROGRESS)
1203+
typedef struct hpatch_TProgressStreamOutput{
1204+
hpatch_TStreamOutput base;
1205+
const hpatch_TStreamOutput* streamOutput;
1206+
double time0;
1207+
double progressR;
1208+
unsigned int progress;
1209+
} hpatch_TProgressStreamOutput;
1210+
static hpatch_BOOL _progressStreamOutput_write(const struct hpatch_TStreamOutput* stream,hpatch_StreamPos_t writeToPos,
1211+
const unsigned char* data,const unsigned char* data_end){
1212+
const char* progressStr="==============================";
1213+
hpatch_TProgressStreamOutput* self=(hpatch_TProgressStreamOutput*)stream->streamImport;
1214+
hpatch_BOOL result=self->streamOutput->write(self->streamOutput,writeToPos,data,data_end);
1215+
if (result){
1216+
unsigned int progress=(unsigned int)((writeToPos+(size_t)(data_end-data))*self->progressR);
1217+
hpatch_BOOL isEnd=(progress==1000);
1218+
if (progress!=self->progress){
1219+
double time1=clock_s();
1220+
if ((time1>=self->time0+1.0/3)||isEnd){
1221+
self->progress=progress;
1222+
self->time0=time1;
1223+
printf("\r patch progress: [%-30s] %3.1f%%",progressStr+((1000-progress)*30/1000),progress*0.1);
1224+
if (isEnd) printf("\n");
1225+
}
1226+
}
1227+
}
1228+
return result;
1229+
}
1230+
1231+
static hpatch_TStreamOutput* _progressStreamInput_wrapper(hpatch_TProgressStreamOutput* self,const hpatch_TStreamOutput* streamOutput){
1232+
memset(self,0,sizeof(*self));
1233+
self->base.streamImport=self;
1234+
self->base.streamSize=streamOutput->streamSize;
1235+
self->base.write=_progressStreamOutput_write;
1236+
self->streamOutput=streamOutput;
1237+
self->progress=-1;
1238+
self->progressR=1000.0/(streamOutput->streamSize?streamOutput->streamSize:1);
1239+
return &self->base;
1240+
}
1241+
#endif //_IS_NEED_PRINT_PROGRESS
1242+
11991243
int hpatch(const char* oldFileName,const char* diffFileName,const char* outNewFileName,
12001244
hpatch_BOOL isLoadOldAll,size_t patchCacheSize,hpatch_StreamPos_t diffDataOffert,
12011245
hpatch_StreamPos_t diffDataSize,hpatch_BOOL vcpatch_isChecksum,hpatch_BOOL vcpatch_isInMem,size_t threadNum){
@@ -1208,9 +1252,13 @@ int hpatch(const char* oldFileName,const char* diffFileName,const char* outNewFi
12081252
hpatch_TFileStreamInput diffData;
12091253
hpatch_TFileStreamInput oldData;
12101254
hpatch_TStreamInput* poldData=&oldData.base;
1255+
hpatch_TStreamOutput* pnewData=&newData.base;
12111256
TByte* temp_cache=0;
12121257
size_t temp_cache_size;
12131258
int patch_result=HPATCH_SUCCESS;
1259+
#if (_IS_NEED_PRINT_PROGRESS)
1260+
hpatch_TProgressStreamOutput _progressStreamOutput;
1261+
#endif
12141262
hpatch_TFileStreamInput_init(&oldData);
12151263
hpatch_TFileStreamInput_init(&diffData);
12161264
hpatch_TFileStreamOutput_init(&newData);
@@ -1284,10 +1332,13 @@ int hpatch(const char* oldFileName,const char* diffFileName,const char* outNewFi
12841332
temp_cache=getPatchMemCache(isLoadOldAll,patchCacheSize,mustAppendMemSize,maxWindowSize, &temp_cache_size);
12851333
}
12861334
check(temp_cache,HPATCH_MEM_ERROR,"alloc cache memory");
1335+
#if (_IS_NEED_PRINT_PROGRESS)
1336+
pnewData=_progressStreamInput_wrapper(&_progressStreamOutput,pnewData);
1337+
#endif
12871338
#if (_IS_NEED_SINGLE_STREAM_DIFF)
12881339
if (diffInfos.isSingleCompressedDiff){
12891340
check(temp_cache_size>=diffInfos.sdiffInfo.stepMemSize+hpatch_kStreamCacheSize*3,HPATCH_MEM_ERROR,"alloc cache memory");
1290-
if (!patch_single_compressed_diff(&newData.base,poldData,&diffData.base,diffInfos.sdiffInfo.diffDataPos,
1341+
if (!patch_single_compressed_diff(pnewData,poldData,&diffData.base,diffInfos.sdiffInfo.diffDataPos,
12911342
diffInfos.sdiffInfo.uncompressedSize,diffInfos.sdiffInfo.compressedSize,decompressPlugin,
12921343
diffInfos.sdiffInfo.coverCount,(size_t)diffInfos.sdiffInfo.stepMemSize,
12931344
temp_cache,temp_cache+temp_cache_size,0,threadNum))
@@ -1296,20 +1347,20 @@ int hpatch(const char* oldFileName,const char* diffFileName,const char* outNewFi
12961347
#endif
12971348
#if (_IS_NEED_BSDIFF)
12981349
if (diffInfos.isBsDiff){
1299-
if (!bspatch_with_cache(&newData.base,poldData,&diffData.base,decompressPlugin,
1350+
if (!bspatch_with_cache(pnewData,poldData,&diffData.base,decompressPlugin,
13001351
temp_cache,temp_cache+temp_cache_size))
13011352
patch_result=HPATCH_BSPATCH_ERROR;
13021353
}else
13031354
#endif
13041355
#if (_IS_NEED_VCDIFF)
13051356
if (diffInfos.isVcDiff){
1306-
if (!vcpatch_with_cache(&newData.base,poldData,&diffData.base,decompressPlugin,
1357+
if (!vcpatch_with_cache(pnewData,poldData,&diffData.base,decompressPlugin,
13071358
vcpatch_isChecksum,temp_cache,temp_cache+temp_cache_size))
13081359
patch_result=HPATCH_VCPATCH_ERROR;
13091360
}else
13101361
#endif
13111362
{
1312-
if (!patch_decompress_with_cache(&newData.base,poldData,&diffData.base,decompressPlugin,
1363+
if (!patch_decompress_with_cache(pnewData,poldData,&diffData.base,decompressPlugin,
13131364
temp_cache,temp_cache+temp_cache_size))
13141365
patch_result=HPATCH_HPATCH_ERROR;
13151366
}
@@ -1355,6 +1406,9 @@ int hpatch_dir(const char* oldPath,const char* diffFileName,const char* outNewPa
13551406
hpatch_TDecompress _decompressPlugin={0};
13561407
const hpatch_TStreamInput* oldStream=0;
13571408
const hpatch_TStreamOutput* newStream=0;
1409+
#if (_IS_NEED_PRINT_PROGRESS)
1410+
hpatch_TProgressStreamOutput _progressStreamOutput;
1411+
#endif
13581412
hpatch_TFileStreamInput_init(&diffData);
13591413
TDirPatcher_init(&dirPatcher);
13601414
if (oldPath) assert(0!=strcmp(oldPath,outNewPath));
@@ -1445,7 +1499,7 @@ int hpatch_dir(const char* oldPath,const char* diffFileName,const char* outNewPa
14451499
LOG_ERR("not found checksumType \"%s\" ERROR!\n",dirDiffInfo->checksumType);
14461500
check_on_error(DIRPATCH_CHECKSUMTYPE_ERROR);
14471501
}
1448-
printf("hpatchz run with checksum plugin: \"%s\" (checksumSets:%s%s%s%s)\n",dirDiffInfo->checksumType,
1502+
printf("hpatchz run with checksum plugin: \"%s\" (checksumSets:%s%s%s%s)\n\n",dirDiffInfo->checksumType,
14491503
checksumSet->isCheck_dirDiffData?" diff":"",checksumSet->isCheck_oldRefData?" old":"",
14501504
checksumSet->isCheck_newRefData?" new":"",checksumSet->isCheck_copyFileData?" copy":"");
14511505
if (!TDirPatcher_checksum(&dirPatcher,checksumSet,temp_cache,temp_cache+temp_cache_size)){
@@ -1469,6 +1523,9 @@ int hpatch_dir(const char* oldPath,const char* diffFileName,const char* outNewPa
14691523
check(TDirPatcher_openNewDirAsStream(&dirPatcher,&hlistener->base,&newStream),
14701524
DIRPATCH_OPEN_NEWPATH_ERROR,"open newFile");
14711525
}
1526+
#if (_IS_NEED_PRINT_PROGRESS)
1527+
newStream=_progressStreamInput_wrapper(&_progressStreamOutput,newStream);
1528+
#endif
14721529
//patch
14731530
if(!TDirPatcher_patch(&dirPatcher,newStream,oldStream,temp_cache,temp_cache+temp_cache_size,threadNum)){
14741531
check_dec(_decompressPlugin.decError);
@@ -1480,6 +1537,7 @@ int hpatch_dir(const char* oldPath,const char* diffFileName,const char* outNewPa
14801537
DIRPATCH_CHECKSUM_NEWDATA_ERROR,"newFile checksum");
14811538
check(hpatch_FALSE,DIRPATCH_PATCH_ERROR,"dir patch run");
14821539
}
1540+
printf(" patch ok!\n");
14831541
clear:
14841542
_isInClear=hpatch_TRUE;
14851543
check(hlistener->patchFinish(hlistener,result==HPATCH_SUCCESS),DIRPATCH_PATCHFINISH_ERROR,"dir patch finish");

libHDiffPatch/HDiff/match_block.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ namespace hdiff_private {
4444

4545
#define _cover_pos(isNew,pcover) (isNew?(pcover)->newPos:(pcover)->oldPos)
4646

47+
#define _kMinMoveLen_new 1 // must 1
48+
#define _kMinMoveLen_old 16
49+
#define kMinMoveLen (isNew?_kMinMoveLen_new:_kMinMoveLen_old)
50+
4751
template<bool isNew> static
4852
void _getPackedCovers(hpatch_StreamPos_t dataSize,const std::vector<hpatch_TCover>& blockCovers,
4953
std::vector<TPackedCover>& out_packedCovers){
@@ -57,7 +61,7 @@ namespace hdiff_private {
5761
continue;
5862
}
5963
hpatch_StreamPos_t moveLen=_cover_pos(isNew,cover)-srcPos;
60-
if (moveLen){
64+
if (moveLen>=kMinMoveLen){
6165
TPackedCover pkcover={srcPos,dst,moveLen};
6266
out_packedCovers.push_back(pkcover);
6367
dst+=moveLen;
@@ -66,7 +70,7 @@ namespace hdiff_private {
6670
}
6771
assert(dataSize>=srcPos);
6872
hpatch_StreamPos_t moveLen=dataSize-srcPos;
69-
if (moveLen){
73+
if (moveLen>=kMinMoveLen){
7074
TPackedCover pkcover={srcPos,dst,moveLen};
7175
out_packedCovers.push_back(pkcover);
7276
dst+=moveLen;
@@ -267,7 +271,7 @@ TMatchBlockStream::~TMatchBlockStream(){
267271
coverCount+=icovers.size(); \
268272
}
269273

270-
void TMatchBlockBase::_unpackData(IDiffInsertCover* diffi,void* pcovers,size_t coverCount,bool isCover32){
274+
void TMatchBlockBase::_unpackData(IDiffInsertCover* diffi,void*& pcovers,size_t& coverCount,bool isCover32){
271275
std::vector<hpatch_TCover> clipCovers;
272276
doClipCover<true>(pcovers,coverCount,isCover32,packedCoversForNew,clipCovers);
273277
_insertCovers(clipCovers);

libHDiffPatch/HDiff/match_block.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ namespace hdiff_private{
3434
struct TAutoMem;
3535

3636
struct TMatchBlockBase{
37-
const size_t matchBlockSize;
38-
const size_t threadNum;
3937
typedef hpatch_TCover TPackedCover;
4038
TMatchBlockBase(size_t _matchBlockSize,size_t _threadNum)
4139
:matchBlockSize(_matchBlockSize),threadNum(_threadNum){}
4240
protected:
4341
void _getPackedCover(hpatch_StreamPos_t newDataSize,hpatch_StreamPos_t oldDataSize);
44-
void _unpackData(IDiffInsertCover* diffi,void* pcovers,size_t coverCount,bool isCover32);
42+
void _unpackData(IDiffInsertCover* diffi,void*& pcovers,size_t& coverCount,bool isCover32);
43+
const size_t matchBlockSize;
44+
const size_t threadNum;
4545
std::vector<hpatch_TCover> blockCovers;
4646
std::vector<TPackedCover> packedCoversForOld;
4747
std::vector<TPackedCover> packedCoversForNew;

libHDiffPatch/HDiff/private_diff/limit_mem_diff/digest_matcher.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,10 @@ TDigestMatcher::TDigestMatcher(const hpatch_TStreamInput* oldData,const hpatch_T
167167
size_t kMatchBlockSize,const hdiff_TMTSets_s& mtsets)
168168
:m_oldData(oldData),m_newData(newData),m_isUseLargeSorted(true),m_mtsets(mtsets),
169169
m_newCacheSize(0),m_oldCacheSize(0),m_oldMinCacheSize(0),m_backupCacheSize(0),m_kMatchBlockSize(0){
170-
if (kMatchBlockSize>(oldData->streamSize+1)/2)
171-
kMatchBlockSize=(size_t)((oldData->streamSize+1)/2);
170+
size_t maxBetterBlockSize=((oldData->streamSize+63)/64+63)/64*64;
171+
if (kMatchBlockSize>maxBetterBlockSize)
172+
kMatchBlockSize=maxBetterBlockSize;
173+
172174
if (kMatchBlockSize<kMatchBlockSize_min)
173175
kMatchBlockSize=kMatchBlockSize_min;
174176
if (oldData->streamSize<kMatchBlockSize) return;

0 commit comments

Comments
 (0)