Make sure we match the whole link text as URL and not only some part. Add some symbols to be invalid in domains. Add 3 tests.

This commit is contained in:
Jonathan Haas 2019-04-26 09:42:20 +02:00
parent 2fe4d2076b
commit bfe504fed3
2 changed files with 29 additions and 6 deletions

View file

@ -304,12 +304,12 @@ ConversationPageState.getNodeBounds = function(node) {
* Test for URL-like `text` that leads somewhere other than `href`.
*/
ConversationPageState.isDeceptiveText = function(text, href) {
// First, does text look like a URI? Right now, just test whether
// it has <string>.<string> in it. More sophisticated tests are
// possible.
let domain = new RegExp("([a-z]*://)?" // Optional scheme
+ "([^\\s:/]+\\.[^\\s:/\\.]+)" // Domain
+ "(/[^\\s]*)?"); // Optional path
// First, does text look like a URI?
let domain = new RegExp("^"
+ "([a-z]*://)?" // Optional scheme
+ "([^\\s:/#%&*@()]+\\.[^\\s:/#%&*@()\\.]+)" // Domain
+ "(/[^\\s]*)?" // Optional path
+ "$");
let textParts = text.match(domain);
if (textParts == null) {
return ConversationPageState.NOT_DECEPTIVE;