Getting all text from DOM
Ok, so I have this simple HTML code
<html>
<head>
<script src="jquery.js"></script>
<script src="script.js"></script>
<title>Get all text</title>
</head>
<body>
<span>This is Thomas</span>
This is Bar
<div id="content">
This is Foo.
</div>
<span>This is Bufu</span>
</body>
</html>
And i want to get all text in a variable. So i made this javascript code.
But "This is Foo" and "This is Bar" are not shown.
var sep = '~';
$(function() {
pageTexts = getTextFromPage();
console.log(pageTexts);
});
function getTextFromPage()
{
var pageText = '';
i = 0;
j = 0;
var itr = document.createTreeWalker(
document.getElementsByTagName("body")[0],
NodeFilter.SHOW_TEXT,
null, // no filter
false
);
while(itr.nextNode())
{
if (itr.currentNode.textContent.search("\t") &&
itr.currentNode.textContent.search("\n") &&
itr.currentNode.parentNode.nodeName.toLowerCase() != 'script' &&
itr.currentNode.parentNode.nodeName.toLowerCase() != 'noscript')
{
if (i == 0)
{
pageText = itr.currentNode.textContent;
i++;
}
else
{
pageText = pageText + sep + itr.currentNode.textContent;
i++;
}
}
charNumber = pageText.length;
elemNumber = i;
}
return pageText;
}
Current result is "This is Thomas~This is Bufu". And I would like to have
"This is Thomas~This is Bar~This is Foo~This is Bufu". Can anyone tell me
what is wrong or what should I do? I will apreciate any answer that helps
me. PS. This is for a chrome-extension and i will need to get all text
from any html page even if is a very complicated page.
No comments:
Post a Comment