x 开头编码的数据解码成中文

在python里，直接decode('utf-8')即可

>>> "xE5x85x84xE5xBCx9FxE9x9AxBExE5xBDx93 xE6x9Dx9CxE6xADx8C".decode('utf-8') u'u5144u5f1fu96beu5f53 u675cu6b4c' >>> print "xE5x85x84xE5xBCx9FxE9x9AxBExE5xBDx93 xE6x9Dx9CxE6xADx8C".decode('utf-8') 兄弟难当杜歌 >>>

在java里未发现直接解码的函数，不过只要理解了数据是如何编码的，就可以很快的进行解码，推荐阅读http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html

UTF-8是unicode编码的一种落地方案：

x对应的是UTF-8编码的数据，通过转化规则可以转换为Unicode编码，就能得到对应的汉字，转换规则很简单，先将x去掉，转换为数字，然后进行对应的位移操作即可，需要注意的是先要判断utf-8的位数：

 val pattern = """(d+.d+.d+.d+) - (S+) (S+) [([^]]+)] "(w+) (S+) S+" (S+) (S+) "([^"]+)" "([^"]+)" "([^"]+)" "([^"]+)""".r
  val decodeDataPattern = """(\x([0-9A-Z]){2})+""".r
  def decodeUtf8(utf8Str:String):String={
    var data =   decodeDataPattern.replaceAllIn(utf8Str, m=>{
        var item = decodeXdata(m.toString())
        item
     }) 
     return data
   }
     
   def decodeXdata(utf8Str:String):String={
     var arr = utf8Str.split("\\x")
     var result = new StringBuilder()
     var isMatchEnd = true
     var matchIndex = 0
     var currentWordLength = 0
     var current = 0
     var e0=0xe0;
     
     for(item <-arr){
        var str = item.trim
        if(str.length()>0){
           var currentCode =  Integer.parseInt(str, 16);
           if(isMatchEnd){
             isMatchEnd = false
             var and = currentCode & e0;
             if(and == 0xe0){
                matchIndex = 1;
                currentWordLength = 3;
                current =  (currentCode & 0x1f) <<12  // 3位编码的
             }else if(and==96){
                matchIndex = 1;
                currentWordLength = 2;
                current =  (currentCode & 0x1f) <<6 // 2位编码的
             }else{
               current = currentCode  // 1位编码的
             }
          }else{
            matchIndex = matchIndex+1;
            if(matchIndex == 2)
            {
              current+=(currentCode & 0x3f) <<6
            }else{
               current+=(currentCode & 0x3f) 
            }
          }
           if(matchIndex==currentWordLength){
               var hex = Integer.toHexString(current)
               hex = if(hex.length()<4) "\u00"+hex else "\u"+hex  //补0
               result.append(new String(StringEscapeUtils.unescapeJava(hex).getBytes,"utf-8")) 
               current = 0
               matchIndex=0
               isMatchEnd = true
           }
        }
     }
     
     return result.toString()
   }