Changeset - 63e17c1c419e
[Not reviewed]
default
0 1 0
Laman - 5 years ago 2019-06-29 14:48:23

String <--> UTF-8 conversion
1 file changed with 38 insertions and 5 deletions:
util.js
38
5
0 comments (0 inline, 0 general)
util.js
Show inline comments
 
@@ -23,37 +23,70 @@ function int322bytes(x){
 

	
 
function int32s2bytes(arr){
 
	return arr.map(int322bytes).reduce((acc,bytes)=>acc.concat(bytes));
 
}
 

	
 
function bytes2hex(arr){
 
	return arr.map(x=>x.toString(16).padStart(2,"0")).join("");
 
}
 

	
 
function str2utf8(s){
 
	let res=[];
 
	let c=s.codePointAt(0);
 
	for(let i=0;c!==undefined;i++,c=s.codePointAt(i)){
 
	for(let i=0; c!==undefined; i++,c=s.codePointAt(i)){
 
		if(c<0x80){res.push(c);}
 
		else if(c<0x800){
 
			res.push(0b11000000|(c>>>6));
 
			res.push(0b10000000|(c&0b111111));
 
		}
 
		else if(c<0x10000){
 
			res.push(0b11100000|(c>>>12));
 
			res.push(0b10000000|((c>>>6)&0b111111));
 
			res.push(0b10000000|(c&0b111111));
 
		}
 
		else{
 
			res.push(0b11110000|(c>>>18));
 
			res.push(0b10000000|((c>>>12)&0b111111));
 
			res.push(0b10000000|((c>>>6)&0b111111));
 
			res.push(0b10000000|(c&0b111111));
 
		}
 
		if(c>0xffff){i++;} // skip surrogate
 
	}
 
	return res;
 
}
 

	
 
/*console.log(str2utf8("$").map(x=>x.toString(16)));
 
console.log(str2utf8("¢").map(x=>x.toString(16)));
 
console.log(str2utf8("€").map(x=>x.toString(16)));
 
console.log(str2utf8("𐍈").map(x=>x.toString(16)));*/
 
function utf82str(arr){
 
	let res=[];
 
	for(let i=0;i<arr.length;i++){
 
		let x=arr[i];
 
		if(x<=0b1111111){res.push(x);}
 
		else if(x<=0b11011111){
 
			let a=x&0b11111;
 
			let b=arr[++i]&0b111111;
 
			res.push(a<<6|b);
 
		}
 
		else if(x<=0b11101111){
 
			let a=x&0b1111;
 
			let b=arr[++i]&0b111111;
 
			let c=arr[++i]&0b111111;
 
			res.push(a<<12|b<<6|c);
 
		}
 
		else{
 
			let a=x&0b111;
 
			let b=arr[++i]&0b111111;
 
			let c=arr[++i]&0b111111;
 
			let d=arr[++i]&0b111111;
 
			res.push(a<<18|b<<12|c<<6|d);
 
		}
 
	}
 
	return res.map(x=>String.fromCodePoint(x)).join("");
 
}
 

	
 
if(typeof module!=='undefined'&&module.hasOwnProperty('exports')){
 
	module.exports.bytes2int32=bytes2int32;
 
	module.exports.bytes2int32s=bytes2int32s;
 
	module.exports.int322bytes=int322bytes;
 
	module.exports.int32s2bytes=int32s2bytes;
 
	module.exports.bytes2hex=bytes2hex;
 
	module.exports.str2utf8=str2utf8;
 
	module.exports.utf82str=utf82str;
 
}
0 comments (0 inline, 0 general)