Monday, February 06, 2006

Converting HTML text to Word

The easiest way is to copy html text to clipboard and save to word.
But when you copy an html to clipboard using copy or cut, windows add initial lines which indicates
start and end of HTML, Selection and Fragment. So, to handle this we need to
create text to be copied into compatble to above requirement

The code for this is

// Save text into clipboard
Object m = System.Reflection.Missing.Value;
DataObject clipDO = new DataObject();
// Convert into HTML with all tags before adding to clipboard
clipDO.SetData(DataFormats.Html, HtmlClipboardData(strHTML));
Clipboard.SetDataObject(clipDO, true); // Save to clipbaord

object typeHtml = (object)Word.WdPasteDataType.wdPasteHTML;
// Save from clipbaord to word
// Here s is word selection
s.PasteSpecial(ref m, ref m, ref m, ref m, ref typeHtml, ref m, ref m);



Now, convert normal html text to clipboard one.

///
/// Convert to proper html like
/// Version:1.0
///StartHTML:000000137
///EndHTML:000000204
///StartFragment:000000149
///EndFragment:000000180
///StartSelection:000000149
////EndSelection:000000180
///
///
///File
///
///
///
///
///
///
private static string HtmlClipboardData(string html)
{
StringBuilder sb = new StringBuilder();
Encoding encoding = Encoding.UTF8;
string Header = @"
Version: 1.0
StartHTML: {0:000000}
EndHTML: {1:000000}
StartFragment: {2:000000}
EndFragment: {3:000000}
";
string HtmlPrefix = @"







";
HtmlPrefix = string.Format(HtmlPrefix, encoding.WebName);

string HtmlSuffix = @"



";

// Get lengths of chunks
int HeaderLength = encoding.GetByteCount(Header);
HeaderLength -= 16; // extra formatting characters {0:000000}
int PrefixLength = encoding.GetByteCount(HtmlPrefix);
int HtmlLength = encoding.GetByteCount(html);
int SuffixLength = encoding.GetByteCount(HtmlSuffix);

// Determine locations of chunks
int StartHtml = HeaderLength;
int StartFragment = StartHtml + PrefixLength;
int EndFragment = StartFragment + HtmlLength;
int EndHtml = EndFragment + SuffixLength;

// Build the data
sb.AppendFormat(Header, StartHtml, EndHtml, StartFragment, EndFragment);
sb.Append(HtmlPrefix);
sb.Append(html);
sb.Append(HtmlSuffix);

//Console.WriteLine(sb.ToString());
return sb.ToString();
} #endregion