c#解析包含HTML特殊字符的字符串XElement

问题描述:

在客户端的服务器+ GWT上使用.NET c#,我有一个Web窗体,它接受用户输入,然后构建一个XML字符串并将其存储在数据库中。然后我需要从数据库中读取它,通过tcp将它发送到手持设备,并将其解析为XElement。一切运作良好,直到您从Word中复制和粘贴文本或在这种情况下脱颖而出,当我尝试这样做:c#解析包含HTML特殊字符的字符串XElement

XElement.parse(str); 

它抛出一个异常:

'.', hexadecimal value 0x00, is an invalid character. Line 132, position 111. 

例字符,将导致此问题是正确的撇号字符(0x2019)。现在可能会有一大堆特殊字符可能从excel/word复制粘贴等。处理此问题的最佳方法是什么?下面是我如何构建从流串:

protected CallResult callUsingSocketClass(string methodName, params Action<CallParameters>[] addParameters) 

{ WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_ENTRY_EXIT, “++调用({0},...)”,方法名);

 if (this.OnTransferring != null) 
     { 
      if (!this.OnTransferring()) 
      { 
       return null; 
      } 
     } 
     CallParameters parameters = new CallParameters(this, methodName); 
     foreach (var addParameter in addParameters) 
     { 
      addParameter(parameters); 
     } 

     string post = this.CreatePost(parameters); 
     WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC(Method={0}, host={1}, port={2}, post='{3}')", methodName, this.Host, this.Port, post); 
     byte[] postBytes = Encoding.UTF8.GetBytes(post); 

     // 
     // Send the request and wait for the reply. 
     // 
     char[] replyContentChars = null; 
     for (int attempts = 0; attempts < 3; attempts++) 
     { 
      Socket socket = null; 
      try 
      { 
       WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - creating socket for RPC call..."); 
       if (this.OnTransferring != null) 
       { 
        if (!this.OnTransferring()) 
        { 
         return null; 
        } 
       } 
       string hostID = this.Host; 
       if (this.HostIPAddress != null) 
       { 
        hostID = this.HostIPAddress; 
       } 
       using (socket = this.connectToServer(hostID, this.Port)) 
       { 
        if (socket == null) 
        { 
         return null; 
        } 
        if (this.OnTransferring != null) 
        { 
         if (!this.OnTransferring()) 
         { 
          return null; 
         } 
        } 
        WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - socket created!"); 
        WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - communicating with server..."); 
        WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - writing post..."); 
        this.sendDataToServer(socket, postBytes); 
        int replyLength = -1; 
        if (this.OnTransferring != null) 
        { 
         if (!this.OnTransferring()) 
         { 
          return null; 
         } 
        } 
        WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - reading reply..."); 
        using (var reader = this.receiveStreamFromServer(socket)) 
        { 
         if (this.OnTransferring != null) 
         { 
          if (!this.OnTransferring()) 
          { 
           socket.Close(); 
           socket = null; 
           return null; 
          } 
         } 
         for (; ;) 
         { 
          string lineRaw = reader.ReadLine().Trim(); 
          string line = lineRaw.ToLowerInvariant(); 
          if (line.StartsWith("content-length:")) 
          { 
           replyLength = int.Parse(line.Substring(15)); 
          } 
          else if (line == "") 
          { 
           if (replyLength < 0) 
           { 
            throw new InvalidOperationException("Reply hasn't specified content-length"); 
           } 
           break; 
          } 
          else 
          { 
           if (this.CookieJar != null) 
           { 
            this.CookieJar.ProcessFromServer(lineRaw); 
           } 
          } 
         } 
         // Content starts here 
         replyContentChars = new char[replyLength]; 
         int replyRecv = 0; 
         do 
         { 
          int charsRecv = reader.Read(replyContentChars, replyRecv, replyLength - replyRecv); 
          if (charsRecv <= 0) 
          { 
           break; 
          } 
          replyRecv += charsRecv; 
         } while (replyRecv < replyLength); 
         //int charsRecv = reader.Read(replyContentChars, 0, replyLength); 
         if (replyRecv != replyLength) 
         { 
          untime.Logger.Logger.Error("Web Service call '{0}' received {1} bytes, header indicated {2} bytes", methodName, replyRecv, replyLength); 
          throw new InvalidOperationException(String.Format("Have not received all of reply data - received {0} bytes, expected {1}", replyRecv, replyLength)); 
         } 
        } 
        socket.Close(); 
        socket = null; 
       } 
      } 
      catch (Exception e) 
      { 

       WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC - exception thrown - {0} [{1},{2}]", e.Message, e.Source, e.StackTrace); 
      } 
      finally 
      { 
       if (socket != null) 
       { 
        socket.Close(); 
        socket = null; 
       } 
      } 
      if (replyContentChars != null) 
      { 
       break; 
      } 
      if (this.OnTransferring != null) 
      { 
       if (!this.OnTransferring()) 
       { 
        return null; 
       } 
      } 
     } 
     // 
     // Verify that data has been received. 
     // 
     if (replyContentChars == null) 
     { 
      return null; 
     } 
     if (this.OnTransferring != null) 
     { 
      if (!this.OnTransferring()) 
      { 
       return null; 
      } 
     } 

     // 
     // Process the received data. 
     // 
     string replyContent = new string(replyContentChars); 
     WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL, "RPC(Method={0}, replyContent='{1}')", methodName, replyContent); 

     XElement xReplyContent = XElement.Parse(replyContent); 

     var xReplyBody = xReplyContent.Element(nsSoap + "Body"); 
     var xFault = xReplyBody.Element(nsSoap + "Fault"); 
     if (xFault != null) 
     { 
      // Something has gone wrong on the server 
      var xFaultCode = xFault.Element(nsSoap + "Code"); 
      var xFaultReason = xFault.Element(nsSoap + "Reason"); 
      untime.Logger.Logger.Error("Web Service call to method '{0}' failed: Code='{1}', Reason='{2}'", methodName, (string)xFaultCode, (string)xFaultReason); 
      string faultCode = (string)xFaultCode; 
      var codeParts = faultCode.Split(':'); 
      XmlQualifiedName xmlQualifiedName; 
      if (codeParts.Length == 2) 
      { 
       xmlQualifiedName = new XmlQualifiedName(codeParts[1], codeParts[0]); 
      } 
      else 
      { 
       xmlQualifiedName = new XmlQualifiedName(faultCode); 
      } 
      throw new SoapException((string)xFaultReason, xmlQualifiedName); 
     } 

     var xResponse = xReplyBody.Element(this.nsArgs + (methodName + "Response")); 
     var xResult = xResponse.Element(this.nsArgs + (methodName + "Result")); 
     if (this.OnTransferring != null) 
     { 
      if (!this.OnTransferring()) 
      { 
       return null; 
      } 
     } 
     var result = new CallResult(xResult); 
     return result; 
    } 
+0

代码不完整。 'replyLength'在哪里定义?在这个例子中你有一个无尽的'for'循环。 – jgauffin 2011-03-29 10:03:41

+0

好的,我已经粘贴了现在在这里完成这项工作的方法。从第70行开始,你可以看到我如何建立字符串。 – Shahid 2011-03-29 10:17:21

+0

你从WebServicesClient.Debug.DebugMessage(WebServicesClient.Debug.MASK_RPC_CALL,“RPC(Method = {0},replyContent ='{1}')”,methodName,replyContent)获得了什么输出结果?' – jgauffin 2011-03-29 10:28:29

System.Text.Encoding.UTF8因为它看起来像Excel使用utf。转换为您最喜爱的编码。

+0

试过了......还是有同样的问题 – Shahid 2011-03-28 15:49:04

+0

你怎么得到'str'变量? – jgauffin 2011-03-28 16:11:32

+0

字符串实际上被发送到手持设备。从UTF8中读取数据流,然后使用UTF8 – Shahid 2011-03-29 08:48:08

我已经找到了答案,这里的问题:

http://social.msdn.microsoft.com/Forums/en-US/csharpgeneral/thread/54c83f23-e579-48e9-9fbe-bc20232a02fc/

的想法是要么剥离/从输入替换Unicode字符或XML使用Unicode编码构建时:

<?xml version="1.0" encoding="UTF-16"?> 

在上面的帖子中,接受的答案建议使用下面的方法(在此粘贴),但是如果您需要替换某些字符,则需要添加一些代码来执行此操作:

 public string getRidOfUnprintablesAndUnicode (string inpString) 
    { 
     string outputs = String.Empty; 
     for (int jj = 0; jj < inpString.Length; jj++) 
     { 
      char ch = inpString[ jj ]; 
      if (((int)(byte)ch) >= 32 & ((int)(byte)ch) <= 128) 
      { 
       outputs += ch; 
      } 
     } 
     return outputs; 
    }