mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-08 04:18:52 +00:00
[ticket/16851] Add Amazonbot, AhrefsBot and SemrushBot
PHPBB3-16851 Signed-off-by: MichaIng <micha@dietpi.com>
This commit is contained in:
parent
75df7c202e
commit
3638b36849
4 changed files with 131 additions and 10 deletions
|
@ -2,7 +2,7 @@
|
||||||
/**
|
/**
|
||||||
* Rebuild BOTS
|
* Rebuild BOTS
|
||||||
*
|
*
|
||||||
* You should make a backup from your whole database. Things can and will go wrong.
|
* You should make a backup from your whole database. Things can and will go wrong.
|
||||||
* This will only work if no BOTs were added.
|
* This will only work if no BOTs were added.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -24,10 +24,14 @@ $user->setup();
|
||||||
|
|
||||||
$bots = array(
|
$bots = array(
|
||||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||||
|
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||||
|
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||||
'Baidu [Spider]' => array('Baiduspider+(', ''),
|
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||||
|
'Bing [Bot]' => array('bingbot/', ''),
|
||||||
|
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||||
'Exabot [Bot]' => array('Exabot/', ''),
|
'Exabot [Bot]' => array('Exabot/', ''),
|
||||||
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
||||||
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
||||||
|
@ -41,7 +45,7 @@ $bots = array(
|
||||||
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
||||||
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
||||||
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
||||||
'ichiro [Crawler]' => array('ichiro/2', ''),
|
'ichiro [Crawler]' => array('ichiro/', ''),
|
||||||
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
||||||
'Metager [Bot]' => array('MetagerBot/', ''),
|
'Metager [Bot]' => array('MetagerBot/', ''),
|
||||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||||
|
@ -54,6 +58,7 @@ $bots = array(
|
||||||
'Online link [Validator]' => array('online link validator', ''),
|
'Online link [Validator]' => array('online link validator', ''),
|
||||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||||
'Seekport [Bot]' => array('Seekbot/', ''),
|
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||||
|
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||||
|
@ -63,7 +68,7 @@ $bots = array(
|
||||||
'Synoo [Bot]' => array('SynooBot/', ''),
|
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||||
'Voyager [Bot]' => array('voyager/1.0', ''),
|
'Voyager [Bot]' => array('voyager/', ''),
|
||||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||||
'W3C [Validator]' => array('W3C_*Validator', ''),
|
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||||
|
@ -74,7 +79,7 @@ $bots = array(
|
||||||
'Yahoo [Bot]' => array('Yahoo! Slurp', ''),
|
'Yahoo [Bot]' => array('Yahoo! Slurp', ''),
|
||||||
'YahooSeeker [Bot]' => array('YahooSeeker/', ''),
|
'YahooSeeker [Bot]' => array('YahooSeeker/', ''),
|
||||||
);
|
);
|
||||||
|
|
||||||
$bot_ids = array();
|
$bot_ids = array();
|
||||||
user_get_id_name($bot_ids, array_keys($bots), USER_IGNORE);
|
user_get_id_name($bot_ids, array_keys($bots), USER_IGNORE);
|
||||||
foreach($bot_ids as $bot)
|
foreach($bot_ids as $bot)
|
||||||
|
|
|
@ -1836,10 +1836,12 @@ function add_bots()
|
||||||
|
|
||||||
$bots = array(
|
$bots = array(
|
||||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||||
|
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||||
|
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||||
'Baidu [Spider]' => array('Baiduspider+(', ''),
|
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||||
'Bing [Bot]' => array('bingbot/', ''),
|
'Bing [Bot]' => array('bingbot/', ''),
|
||||||
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||||
'Exabot [Bot]' => array('Exabot/', ''),
|
'Exabot [Bot]' => array('Exabot/', ''),
|
||||||
|
@ -1855,7 +1857,7 @@ function add_bots()
|
||||||
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
'Heritrix [Crawler]' => array('heritrix/1.', ''),
|
||||||
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
'IBM Research [Bot]' => array('ibm.com/cs/crawler', ''),
|
||||||
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
'ICCrawler - ICjobs' => array('ICCrawler - ICjobs', ''),
|
||||||
'ichiro [Crawler]' => array('ichiro/2', ''),
|
'ichiro [Crawler]' => array('ichiro/', ''),
|
||||||
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
'Majestic-12 [Bot]' => array('MJ12bot/', ''),
|
||||||
'Metager [Bot]' => array('MetagerBot/', ''),
|
'Metager [Bot]' => array('MetagerBot/', ''),
|
||||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||||
|
@ -1868,6 +1870,7 @@ function add_bots()
|
||||||
'Online link [Validator]' => array('online link validator', ''),
|
'Online link [Validator]' => array('online link validator', ''),
|
||||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||||
'Seekport [Bot]' => array('Seekbot/', ''),
|
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||||
|
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||||
|
@ -1877,7 +1880,7 @@ function add_bots()
|
||||||
'Synoo [Bot]' => array('SynooBot/', ''),
|
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||||
'Voyager [Bot]' => array('voyager/1.0', ''),
|
'Voyager [Bot]' => array('voyager/', ''),
|
||||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||||
'W3C [Validator]' => array('W3C_*Validator', ''),
|
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||||
|
|
104
phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php
Normal file
104
phpBB/phpbb/db/migration/data/v33x/bot_update_v2.php
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* This file is part of the phpBB Forum Software package.
|
||||||
|
*
|
||||||
|
* @copyright (c) phpBB Limited <https://www.phpbb.com>
|
||||||
|
* @license GNU General Public License, version 2 (GPL-2.0)
|
||||||
|
*
|
||||||
|
* For full copyright and license information, please see
|
||||||
|
* the docs/CREDITS.txt file.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace phpbb\db\migration\data\v33x;
|
||||||
|
|
||||||
|
class bot_update_v2 extends \phpbb\db\migration\migration
|
||||||
|
{
|
||||||
|
public static function depends_on()
|
||||||
|
{
|
||||||
|
return ['\phpbb\db\migration\data\v33x\v334'];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function update_data()
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
['custom', [[$this, 'add_bots']]],
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function add_bots()
|
||||||
|
{
|
||||||
|
$bots = [
|
||||||
|
'Ahrefs [Bot]' => 'AhrefsBot/',
|
||||||
|
'Amazon [Bot]' => 'Amazonbot/',
|
||||||
|
'Semrush [Bot]' => 'SemrushBot/',
|
||||||
|
];
|
||||||
|
|
||||||
|
$group_row = [];
|
||||||
|
|
||||||
|
foreach ($bots as $bot_name => $bot_agent)
|
||||||
|
{
|
||||||
|
$bot_name_clean = utf8_clean_string($bot_name);
|
||||||
|
|
||||||
|
$sql = 'SELECT user_id
|
||||||
|
FROM ' . $this->table_prefix . 'users
|
||||||
|
WHERE ' . $this->db->sql_build_array('SELECT', ['username_clean' => $bot_name_clean]);
|
||||||
|
$result = $this->db->sql_query($sql);
|
||||||
|
$bot_exists = (bool) $this->db->sql_fetchfield('user_id');
|
||||||
|
$this->db->sql_freeresult($result);
|
||||||
|
|
||||||
|
if ($bot_exists)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!count($group_row))
|
||||||
|
{
|
||||||
|
$sql = 'SELECT group_id, group_colour
|
||||||
|
FROM ' . $this->table_prefix . 'groups
|
||||||
|
WHERE ' . $this->db->sql_build_array('SELECT', ['group_name' => 'BOTS']);
|
||||||
|
$result = $this->db->sql_query($sql);
|
||||||
|
$group_row = $this->db->sql_fetchrow($result);
|
||||||
|
$this->db->sql_freeresult($result);
|
||||||
|
|
||||||
|
// Default fallback, should never get here
|
||||||
|
if (!count($group_row))
|
||||||
|
{
|
||||||
|
$group_row['group_id'] = 6;
|
||||||
|
$group_row['group_colour'] = '9E8DA7';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!function_exists('user_add'))
|
||||||
|
{
|
||||||
|
include($this->phpbb_root_path . 'includes/functions_user.' . $this->php_ext);
|
||||||
|
}
|
||||||
|
|
||||||
|
$user_row = [
|
||||||
|
'user_type' => USER_IGNORE,
|
||||||
|
'group_id' => $group_row['group_id'],
|
||||||
|
'username' => $bot_name,
|
||||||
|
'user_regdate' => time(),
|
||||||
|
'user_password' => '',
|
||||||
|
'user_colour' => $group_row['group_colour'],
|
||||||
|
'user_email' => '',
|
||||||
|
'user_lang' => $this->config['default_lang'],
|
||||||
|
'user_style' => $this->config['default_style'],
|
||||||
|
'user_timezone' => 0,
|
||||||
|
'user_dateformat' => $this->config['default_dateformat'],
|
||||||
|
'user_allow_massemail' => 0,
|
||||||
|
];
|
||||||
|
|
||||||
|
$user_id = user_add($user_row);
|
||||||
|
$sql = 'INSERT INTO ' . $this->table_prefix . 'bots ' . $this->db->sql_build_array('INSERT', [
|
||||||
|
'bot_active' => 1,
|
||||||
|
'bot_name' => $bot_name,
|
||||||
|
'user_id' => (int) $user_id,
|
||||||
|
'bot_agent' => $bot_agent,
|
||||||
|
'bot_ip' => '',
|
||||||
|
]);
|
||||||
|
$this->db->sql_query($sql);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -58,13 +58,15 @@ class add_bots extends \phpbb\install\task_base
|
||||||
*/
|
*/
|
||||||
protected $bot_list = array(
|
protected $bot_list = array(
|
||||||
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
'AdsBot [Google]' => array('AdsBot-Google', ''),
|
||||||
|
'Ahrefs [Bot]' => array('AhrefsBot/', ''),
|
||||||
'Alexa [Bot]' => array('ia_archiver', ''),
|
'Alexa [Bot]' => array('ia_archiver', ''),
|
||||||
'Alta Vista [Bot]' => array('Scooter/', ''),
|
'Alta Vista [Bot]' => array('Scooter/', ''),
|
||||||
|
'Amazon [Bot]' => array('Amazonbot/', ''),
|
||||||
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
'Ask Jeeves [Bot]' => array('Ask Jeeves', ''),
|
||||||
'Baidu [Spider]' => array('Baiduspider', ''),
|
'Baidu [Spider]' => array('Baiduspider', ''),
|
||||||
'Bing [Bot]' => array('bingbot/', ''),
|
'Bing [Bot]' => array('bingbot/', ''),
|
||||||
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
'DuckDuckGo [Bot]' => array('DuckDuckBot/', ''),
|
||||||
'Exabot [Bot]' => array('Exabot', ''),
|
'Exabot [Bot]' => array('Exabot/', ''),
|
||||||
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
'FAST Enterprise [Crawler]' => array('FAST Enterprise Crawler', ''),
|
||||||
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
'FAST WebCrawler [Crawler]' => array('FAST-WebCrawler/', ''),
|
||||||
'Francis [Bot]' => array('http://www.neomo.de/', ''),
|
'Francis [Bot]' => array('http://www.neomo.de/', ''),
|
||||||
|
@ -83,21 +85,28 @@ class add_bots extends \phpbb\install\task_base
|
||||||
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
'MSN NewsBlogs' => array('msnbot-NewsBlogs/', ''),
|
||||||
'MSN [Bot]' => array('msnbot/', ''),
|
'MSN [Bot]' => array('msnbot/', ''),
|
||||||
'MSNbot Media' => array('msnbot-media/', ''),
|
'MSNbot Media' => array('msnbot-media/', ''),
|
||||||
|
'NG-Search [Bot]' => array('NG-Search/', ''),
|
||||||
'Nutch [Bot]' => array('http://lucene.apache.org/nutch/', ''),
|
'Nutch [Bot]' => array('http://lucene.apache.org/nutch/', ''),
|
||||||
|
'Nutch/CVS [Bot]' => array('NutchCVS/', ''),
|
||||||
|
'OmniExplorer [Bot]' => array('OmniExplorer_Bot/', ''),
|
||||||
'Online link [Validator]' => array('online link validator', ''),
|
'Online link [Validator]' => array('online link validator', ''),
|
||||||
'psbot [Picsearch]' => array('psbot/0', ''),
|
'psbot [Picsearch]' => array('psbot/0', ''),
|
||||||
|
'Seekport [Bot]' => array('Seekbot/', ''),
|
||||||
|
'Semrush [Bot]' => array('SemrushBot/', ''),
|
||||||
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
'Sensis [Crawler]' => array('Sensis Web Crawler', ''),
|
||||||
'SEO Crawler' => array('SEO search Crawler/', ''),
|
'SEO Crawler' => array('SEO search Crawler/', ''),
|
||||||
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
'Seoma [Crawler]' => array('Seoma [SEO Crawler]', ''),
|
||||||
'SEOSearch [Crawler]' => array('SEOsearch/', ''),
|
'SEOSearch [Crawler]' => array('SEOsearch/', ''),
|
||||||
'Snappy [Bot]' => array('Snappy/1.1 ( http://www.urltrends.com/ )', ''),
|
'Snappy [Bot]' => array('Snappy/1.1 ( http://www.urltrends.com/ )', ''),
|
||||||
'Steeler [Crawler]' => array('http://www.tkl.iis.u-tokyo.ac.jp/~crawler/', ''),
|
'Steeler [Crawler]' => array('http://www.tkl.iis.u-tokyo.ac.jp/~crawler/', ''),
|
||||||
|
'Synoo [Bot]' => array('SynooBot/', ''),
|
||||||
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
'Telekom [Bot]' => array('crawleradmin.t-info@telekom.de', ''),
|
||||||
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
'TurnitinBot [Bot]' => array('TurnitinBot/', ''),
|
||||||
'Voyager [Bot]' => array('voyager/', ''),
|
'Voyager [Bot]' => array('voyager/', ''),
|
||||||
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
'W3 [Sitesearch]' => array('W3 SiteSearch Crawler', ''),
|
||||||
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
'W3C [Linkcheck]' => array('W3C-checklink/', ''),
|
||||||
'W3C [Validator]' => array('W3C_Validator', ''),
|
'W3C [Validator]' => array('W3C_*Validator', ''),
|
||||||
|
'WiseNut [Bot]' => array('http://www.WISEnutbot.com', ''),
|
||||||
'YaCy [Bot]' => array('yacybot', ''),
|
'YaCy [Bot]' => array('yacybot', ''),
|
||||||
'Yahoo MMCrawler [Bot]' => array('Yahoo-MMCrawler/', ''),
|
'Yahoo MMCrawler [Bot]' => array('Yahoo-MMCrawler/', ''),
|
||||||
'Yahoo Slurp [Bot]' => array('Yahoo! DE Slurp', ''),
|
'Yahoo Slurp [Bot]' => array('Yahoo! DE Slurp', ''),
|
||||||
|
|
Loading…
Add table
Reference in a new issue