From 3638b36849fe5baca712e7dcc4a941de562b02cc Mon Sep 17 00:00:00 2001 From: MichaIng Date: Fri, 20 Aug 2021 15:15:00 +0200 Subject: [PATCH] [ticket/16851] Add Amazonbot, AhrefsBot and SemrushBot PHPBB3-16851 Signed-off-by: MichaIng --- phpBB/develop/repair_bots.php | 15 ++- phpBB/includes/functions_convert.php | 9 +- .../db/migration/data/v33x/bot_update_v2.php | 104 ++++++++++++++++++ .../module/install_data/task/add_bots.php | 13 ++- 4 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php diff --git a/phpBB/develop/repair_bots.php b/phpBB/develop/repair_bots.php index 2c6e9ce091..100cc1e9af 100644 --- a/phpBB/develop/repair_bots.php +++ b/phpBB/develop/repair_bots.php @@ -2,7 +2,7 @@ /** * Rebuild BOTS * -* You should make a backup from your whole database. Things can and will go wrong. +* You should make a backup from your whole database. Things can and will go wrong. * This will only work if no BOTs were added. * */ @@ -24,10 +24,14 @@ $user->setup(); $bots = array( 'AdsBot [Google]' => array('AdsBot-Google', ''), + 'Ahrefs [Bot]' => array('AhrefsBot/', ''), 'Alexa [Bot]' => array('ia_archiver', ''), 'Alta Vista [Bot]' => array('Scooter/', ''), + 'Amazon [Bot]' => array('Amazonbot/', ''), 'Ask Jeeves [Bot]' => array('Ask Jeeves', ''), - 'Baidu [Spider]' => array('Baiduspider+(', ''), + 'Baidu [Spider]' => array('Baiduspider', ''), + 'Bing [Bot]' => array('bingbot/', ''), + 'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''), 'Exabot [Bot]' => array('Exabot/', ''), 'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''), 'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''), @@ -41,7 +45,7 @@ $bots = array( 'Heritrix [Crawler]' => array('heritrix/1.', ''), 'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''), 'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''), - 'ichiro [Crawler]' => array('ichiro/2', ''), + 'ichiro [Crawler]' => array('ichiro/', ''), 'Majestic-12 [Bot]' => array('MJ12bot/', ''), 'Metager [Bot]' => array('MetagerBot/', ''), 'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''), @@ -54,6 +58,7 @@ $bots = array( 'Online link [Validator]' => array('online link validator', ''), 'psbot [Picsearch]' => array('psbot/0', ''), 'Seekport [Bot]' => array('Seekbot/', ''), + 'Semrush [Bot]' => array('SemrushBot/', ''), 'Sensis [Crawler]' => array('Sensis Web Crawler', ''), 'SEO Crawler' => array('SEO search Crawler/', ''), 'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''), @@ -63,7 +68,7 @@ $bots = array( 'Synoo [Bot]' => array('SynooBot/', ''), 'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''), 'TurnitinBot [Bot]' => array('TurnitinBot/', ''), - 'Voyager [Bot]' => array('voyager/1.0', ''), + 'Voyager [Bot]' => array('voyager/', ''), 'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''), 'W3C [Linkcheck]' => array('W3C-checklink/', ''), 'W3C [Validator]' => array('W3C_*Validator', ''), @@ -74,7 +79,7 @@ $bots = array( 'Yahoo [Bot]' => array('Yahoo! Slurp', ''), 'YahooSeeker [Bot]' => array('YahooSeeker/', ''), ); - + $bot_ids = array(); user_get_id_name($bot_ids, array_keys($bots), USER_IGNORE); foreach($bot_ids as $bot) diff --git a/phpBB/includes/functions_convert.php b/phpBB/includes/functions_convert.php index cc5798a52e..1b499293d7 100644 --- a/phpBB/includes/functions_convert.php +++ b/phpBB/includes/functions_convert.php @@ -1836,10 +1836,12 @@ function add_bots() $bots = array( 'AdsBot [Google]' => array('AdsBot-Google', ''), + 'Ahrefs [Bot]' => array('AhrefsBot/', ''), 'Alexa [Bot]' => array('ia_archiver', ''), 'Alta Vista [Bot]' => array('Scooter/', ''), + 'Amazon [Bot]' => array('Amazonbot/', ''), 'Ask Jeeves [Bot]' => array('Ask Jeeves', ''), - 'Baidu [Spider]' => array('Baiduspider+(', ''), + 'Baidu [Spider]' => array('Baiduspider', ''), 'Bing [Bot]' => array('bingbot/', ''), 'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''), 'Exabot [Bot]' => array('Exabot/', ''), @@ -1855,7 +1857,7 @@ function add_bots() 'Heritrix [Crawler]' => array('heritrix/1.', ''), 'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''), 'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''), - 'ichiro [Crawler]' => array('ichiro/2', ''), + 'ichiro [Crawler]' => array('ichiro/', ''), 'Majestic-12 [Bot]' => array('MJ12bot/', ''), 'Metager [Bot]' => array('MetagerBot/', ''), 'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''), @@ -1868,6 +1870,7 @@ function add_bots() 'Online link [Validator]' => array('online link validator', ''), 'psbot [Picsearch]' => array('psbot/0', ''), 'Seekport [Bot]' => array('Seekbot/', ''), + 'Semrush [Bot]' => array('SemrushBot/', ''), 'Sensis [Crawler]' => array('Sensis Web Crawler', ''), 'SEO Crawler' => array('SEO search Crawler/', ''), 'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''), @@ -1877,7 +1880,7 @@ function add_bots() 'Synoo [Bot]' => array('SynooBot/', ''), 'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''), 'TurnitinBot [Bot]' => array('TurnitinBot/', ''), - 'Voyager [Bot]' => array('voyager/1.0', ''), + 'Voyager [Bot]' => array('voyager/', ''), 'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''), 'W3C [Linkcheck]' => array('W3C-checklink/', ''), 'W3C [Validator]' => array('W3C_*Validator', ''), diff --git a/phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php b/phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php new file mode 100644 index 0000000000..64af31516f --- /dev/null +++ b/phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php @@ -0,0 +1,104 @@ + + * @license GNU General Public License, version 2 (GPL-2.0) + * + * For full copyright and license information, please see + * the docs/CREDITS.txt file. + * + */ + +namespace phpbb\db\migration\data\v33x; + +class bot_update_v2 extends \phpbb\db\migration\migration +{ + public static function depends_on() + { + return ['\phpbb\db\migration\data\v33x\v334']; + } + + public function update_data() + { + return [ + ['custom', [[$this, 'add_bots']]], + ]; + } + + public function add_bots() + { + $bots = [ + 'Ahrefs [Bot]' => 'AhrefsBot/', + 'Amazon [Bot]' => 'Amazonbot/', + 'Semrush [Bot]' => 'SemrushBot/', + ]; + + $group_row = []; + + foreach ($bots as $bot_name => $bot_agent) + { + $bot_name_clean = utf8_clean_string($bot_name); + + $sql = 'SELECT user_id + FROM ' . $this->table_prefix . 'users + WHERE ' . $this->db->sql_build_array('SELECT', ['username_clean' => $bot_name_clean]); + $result = $this->db->sql_query($sql); + $bot_exists = (bool) $this->db->sql_fetchfield('user_id'); + $this->db->sql_freeresult($result); + + if ($bot_exists) + { + continue; + } + + if (!count($group_row)) + { + $sql = 'SELECT group_id, group_colour + FROM ' . $this->table_prefix . 'groups + WHERE ' . $this->db->sql_build_array('SELECT', ['group_name' => 'BOTS']); + $result = $this->db->sql_query($sql); + $group_row = $this->db->sql_fetchrow($result); + $this->db->sql_freeresult($result); + + // Default fallback, should never get here + if (!count($group_row)) + { + $group_row['group_id'] = 6; + $group_row['group_colour'] = '9E8DA7'; + } + } + + if (!function_exists('user_add')) + { + include($this->phpbb_root_path . 'includes/functions_user.' . $this->php_ext); + } + + $user_row = [ + 'user_type' => USER_IGNORE, + 'group_id' => $group_row['group_id'], + 'username' => $bot_name, + 'user_regdate' => time(), + 'user_password' => '', + 'user_colour' => $group_row['group_colour'], + 'user_email' => '', + 'user_lang' => $this->config['default_lang'], + 'user_style' => $this->config['default_style'], + 'user_timezone' => 0, + 'user_dateformat' => $this->config['default_dateformat'], + 'user_allow_massemail' => 0, + ]; + + $user_id = user_add($user_row); + $sql = 'INSERT INTO ' . $this->table_prefix . 'bots ' . $this->db->sql_build_array('INSERT', [ + 'bot_active' => 1, + 'bot_name' => $bot_name, + 'user_id' => (int) $user_id, + 'bot_agent' => $bot_agent, + 'bot_ip' => '', + ]); + $this->db->sql_query($sql); + } + } +} diff --git a/phpBB/phpbb/install/module/install_data/task/add_bots.php b/phpBB/phpbb/install/module/install_data/task/add_bots.php index e53087a671..d2c29e36a8 100644 --- a/phpBB/phpbb/install/module/install_data/task/add_bots.php +++ b/phpBB/phpbb/install/module/install_data/task/add_bots.php @@ -58,13 +58,15 @@ class add_bots extends \phpbb\install\task_base */ protected $bot_list = array( 'AdsBot [Google]' => array('AdsBot-Google', ''), + 'Ahrefs [Bot]' => array('AhrefsBot/', ''), 'Alexa [Bot]' => array('ia_archiver', ''), 'Alta Vista [Bot]' => array('Scooter/', ''), + 'Amazon [Bot]' => array('Amazonbot/', ''), 'Ask Jeeves [Bot]' => array('Ask Jeeves', ''), 'Baidu [Spider]' => array('Baiduspider', ''), 'Bing [Bot]' => array('bingbot/', ''), 'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''), - 'Exabot [Bot]' => array('Exabot', ''), + 'Exabot [Bot]' => array('Exabot/', ''), 'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''), 'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''), 'Francis [Bot]' => array('http://www.neomo.de/', ''), @@ -83,21 +85,28 @@ class add_bots extends \phpbb\install\task_base 'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''), 'MSN [Bot]' => array('msnbot/', ''), 'MSNbot Media' => array('msnbot-media/', ''), + 'NG-Search [Bot]' => array('NG-Search/', ''), 'Nutch [Bot]' => array('http://lucene.apache.org/nutch/', ''), + 'Nutch/CVS [Bot]' => array('NutchCVS/', ''), + 'OmniExplorer [Bot]' => array('OmniExplorer_Bot/', ''), 'Online link [Validator]' => array('online link validator', ''), 'psbot [Picsearch]' => array('psbot/0', ''), + 'Seekport [Bot]' => array('Seekbot/', ''), + 'Semrush [Bot]' => array('SemrushBot/', ''), 'Sensis [Crawler]' => array('Sensis Web Crawler', ''), 'SEO Crawler' => array('SEO search Crawler/', ''), 'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''), 'SEOSearch [Crawler]' => array('SEOsearch/', ''), 'Snappy [Bot]' => array('Snappy/1.1 ( http://www.urltrends.com/ )', ''), 'Steeler [Crawler]' => array('http://www.tkl.iis.u-tokyo.ac.jp/~crawler/', ''), + 'Synoo [Bot]' => array('SynooBot/', ''), 'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''), 'TurnitinBot [Bot]' => array('TurnitinBot/', ''), 'Voyager [Bot]' => array('voyager/', ''), 'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''), 'W3C [Linkcheck]' => array('W3C-checklink/', ''), - 'W3C [Validator]' => array('W3C_Validator', ''), + 'W3C [Validator]' => array('W3C_*Validator', ''), + 'WiseNut [Bot]' => array('http://www.WISEnutbot.com', ''), 'YaCy [Bot]' => array('yacybot', ''), 'Yahoo MMCrawler [Bot]' => array('Yahoo-MMCrawler/', ''), 'Yahoo Slurp [Bot]' => array('Yahoo! DE Slurp', ''),