BSDiff是一个增量更新算法,它在服务器端运行BSDiff算法产生patch包。在客户端运行BSPatch算法,将旧文件和patch包合成新文件。增量更新在很多大型应用中是比较常见的一种技术,通过文件对比的方式来生成差分包。
bsdiff这个开源库连接:http://www.daemonology.net/bsdiff/
遍历搜索引擎,只发现BSPatch算法实现只有C语言源码,使得在Android等Java语言环境中,只能通过NDK的方式运行,非常不方便。
自己阅读了下BSPatch算法源码,发现并不复杂,完全可以用Java重新实现一下。另外吐槽一下,BSPatch算法的C语言源码main()主函数是真的好长,而且还用了err.h、fseeko、ftello这种windows上都没有的接口,难受。。。
以下为我实现的Java版BSPatch算法,并把函数拆分了一下,使得每个方法不超过40行,方便阅读。可直接调用
BSPatch.patch(oldPath, newPath, patchPath);
实现增量更新。源码依赖Apache的Commons Compress用以实现bzip2的解压,链接:http://commons.apache.org/proper/commons-compress/download_compress.cgi
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
public class BSPatch {
private RandomAccessFile mOld;
private OutputStream mNew;
private InputStream mPatch;
// 以下3个数据仅在patch()内有效
private InputStream bzCtrl;// 控制数据
private InputStream bzData;// 差分数据
private InputStream bzExtra;// 剩余数据
private BSPatch(RandomAccessFile old, OutputStream New, InputStream patch) {
mOld = old;
mNew = New;
mPatch = patch;
}
/**
* 低位在前,从流中读取8字节的长整形数据
* @param is: 输入流,8字节
* @return : 长整形数据
*/
private static long readLong(InputStream is) throws IOException {
byte[] bytes = new byte[8];
is.read(bytes);
long ret = ((long) (bytes[7] & 0x7F) << 56) | ((long) (bytes[6] & 0xFF) << 48) |
((long) (bytes[5] & 0xFF) << 40) | ((long) (bytes[4] & 0xFF) << 32) |
((long) (bytes[3] & 0xFF) << 24) | ((long) (bytes[2] & 0xFF) << 16) |
((long) (bytes[1] & 0xFF) << 8) | ((long) (bytes[0] & 0xFF));
return ((bytes[7] & 0x80) != 0) ? -ret : ret;
}
/**
* 检查patch文件头
* @param is: patch文件输入流,8字节
* @throws IOException : 检查出错
*/
private static void checkHeader(InputStream is) throws IOException {
/* Check header for appropriate magic 'BSDIFF40' */
byte[] bytes = new byte[8];
is.read(bytes);
String str = new String(bytes);
if (!str.equals("BSDIFF40")) {
throw new IOException("patch file doesn't start from 'BSDIFF40'" +
" but with '" + str + "'");
}
}
/**
* 解析patch文件,生成bzCtrl,bzData,bzExtra数据
* @return : 新文件大小
*/
private long readPatchFile() throws IOException {
/* Check for appropriate magic */
checkHeader(mPatch);
/* Read lengths from header */
long bzCtrlLen = readLong(mPatch);
long bzDataLen = readLong(mPatch);
long newSize = readLong(mPatch);
if (bzCtrlLen < 0 || bzDataLen < 0 || newSize < 0 ||
bzCtrlLen > Integer.MAX_VALUE || bzDataLen > Integer.MAX_VALUE) {
throw new IOException("bzCtrlLen=" + bzCtrlLen +
", bzDataLen=" + bzDataLen + ", newSize=" + newSize);
}
byte[] bytes = new byte[(int) bzCtrlLen];
mPatch.read(bytes);
bzCtrl = new BZip2CompressorInputStream(new ByteArrayInputStream(bytes));
bytes = new byte[(int) bzDataLen];
mPatch.read(bytes);
bzData = new BZip2CompressorInputStream(new ByteArrayInputStream(bytes));
bzExtra = new BZip2CompressorInputStream(new BufferedInputStream(mPatch));
return newSize;
}
/**
* 读取差分数据,再于旧数据相加,最后把得到的新数据输出
* @param diffLen: 差分数据长度
*/
private void writeDiffData(long diffLen) throws IOException {
byte[] bytes = new byte[1024];
byte[] bytes2 = new byte[1024];
/* Read diff string and add old data to diff string */
for (long i = 0; i < diffLen; ) {
int len = (i + bytes.length > diffLen) ?
(int) (diffLen - i) : bytes.length;
len = bzData.read(bytes, 0, len);
mOld.readFully(bytes2, 0, len);
for (int j = 0; j < len; j++) {
bytes[j] += bytes2[j];
}
mNew.write(bytes, 0, len);
i += len;
}
mNew.flush();
}
/**
* 读取剩余的数据,并输出
* @param extraLen: 剩余数据长度
*/
private void writeExtraData(long extraLen) throws IOException {
byte[] bytes = new byte[1024];
/* Read extra string */
for (long i = 0; i < extraLen; ) {
int len = (i + bytes.length > extraLen) ?
(int) (extraLen - i) : bytes.length;
len = bzExtra.read(bytes, 0, len);
mNew.write(bytes, 0, len);
i += len;
}
mNew.flush();
}
/**
Patch file format:
0 8 "BSDIFF40"
8 8 X
16 8 Y
24 8 sizeof(newfile)
32 X bzip2(control block)
32+X Y bzip2(diff block)
32+X+Y ??? bzip2(extra block)
with control block a set of triples (x,y,z) meaning "add x bytes
from oldfile to x bytes from the diff block; copy y bytes from the
extra block; seek forwards in oldfile by z bytes".
*/
private void patch() throws IOException {
long newSize = readPatchFile();
long newPos = 0;
while (newPos < newSize) {
/* Read control data */
long diffLen = readLong(bzCtrl);
long extraLen = readLong(bzCtrl);
long adjustLen = readLong(bzCtrl);
/* Sanity-check */
if (newPos + diffLen > newSize) {
throw new IOException("newPos(" + newPos +
") + diffLen(" + diffLen + ") > newSize(" + newSize + ")");
}
writeDiffData(diffLen);
newPos += diffLen;
/* Sanity-check */
if (newPos + extraLen > newSize) {
throw new IOException("newPos(" + newPos +
") + extraLen(" + extraLen + ") > newSize(" + newSize + ")");
}
writeExtraData(extraLen);
newPos += extraLen;
mOld.seek(mOld.getFilePointer() + adjustLen);
}
bzCtrl.close();
bzData.close();
bzExtra.close();
}
private void close() throws IOException {
mOld.close();
mNew.close();
mPatch.close();
}
public static void patch(String oldPath, String newPath, String patchPath)
throws IOException {
BSPatch This = new BSPatch(new RandomAccessFile(oldPath, "r"),
new FileOutputStream(newPath), new FileInputStream(patchPath));
This.patch();
This.close();
}
}
|