This code generates a lookup map using get_html_translation_table of named html entities to numeric entities. str_replace is then used on a string to convert those entities.
First two functions are from http://stackoverflow.com/a/12848889
function utf8_chr ($ord) { switch (TRUE) { case $ord < 0x80: return pack('C*', $ord & 0x7F); case $ord < 0x0800: return pack('C*', (($ord & 0x07C0) >> 6) | 0xC0, ($ord & 0x3F) | 0x80); case $ord < 0x010000: return pack('C*', (($ord & 0xF000) >> 12) | 0xE0, (($ord & 0x0FC0) >> 6) | 0x80, ($ord & 0x3F) | 0x80); case $ord < 0x110000: return pack('C*', (($ord & 0x1C0000) >> 18) | 0xF0, (($ord & 0x03F000) >> 12) | 0x80, (($ord & 0x0FC0) >> 6) | 0x80, ($ord & 0x3F) | 0x80); } return FALSE; } function utf8_ord ($chr) { $bytes = array_values(unpack('C*', $chr)); switch (count($bytes)) { case 1: if ($bytes[0] < 0x80) { return $bytes[0]; } break; case 2: if (($bytes[0] & 0xE0) === 0xC0 && ($bytes[1] & 0xC0) === 0x80) { return (($bytes[0] & 0x1F) << 6) | ($bytes[1] & 0x3F); } break; case 3: if (($bytes[0] & 0xF0) === 0xE0 && ($bytes[1] & 0xC0) === 0x80 && ($bytes[2] & 0xC0) === 0x80) { return (($bytes[0] & 0x0F) << 12) | (($bytes[1] & 0x3F) << 6) | ($bytes[2] & 0x3F); } break; case 4: if (($bytes[0] & 0xF8) === 0xF0 && ($bytes[1] & 0xC0) === 0x80 && ($bytes[2] & 0xC0) === 0x80 && ($bytes[3] & 0xC0) === 0x80) { return (($bytes[0] & 0x07) << 18) | (($bytes[1] & 0x3F) << 12) | (($bytes[2] & 0x3F) << 6) | ($bytes[3] & 0x3F); } break; } return FALSE; } $input = '<title>My Web Page</title> this & that™'; $map = array_map(function($a) { return '&#' . utf8_ord($a) . ';'; }, array_flip(get_html_translation_table(HTML_ENTITIES, ENT_HTML401))); $output = str_replace(array_keys($map), array_values($map), $input); echo "$output\n";