This commit is contained in:
JoshyPHP 2024-10-15 11:29:10 +02:00 committed by GitHub
commit 4246c8d165
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 401 additions and 43 deletions

View file

@ -75,6 +75,9 @@ $lang = array_merge($lang, array(
'CLI_DESCRIPTION_REPARSER_REPARSE' => 'Reparses stored text with the current text_formatter services.',
'CLI_DESCRIPTION_REPARSER_REPARSE_ARG_1' => 'Type of text to reparse. Leave blank to reparse everything.',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_DRY_RUN' => 'Do not save any changes; just print what would happen',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_CALLBACK' => 'PHP callback that accepts a single array argument for the record and returns a boolean to indicate whether the record must be reparsed, e.g.: "my\\ext\\reparser::filter"',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_TEXT_LIKE' => 'SQL LIKE predicate applied on the text, e.g.: "<r%"',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_TEXT_REGEXP' => 'PCRE regexp that matches against the text, e.g.: "/youtube/i"',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_RANGE_MIN' => 'Lowest record ID to process',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_RANGE_MAX' => 'Highest record ID to process',
'CLI_DESCRIPTION_REPARSER_REPARSE_OPT_RANGE_SIZE' => 'Approximate number of records to process at a time',

View file

@ -93,6 +93,24 @@ class reparse extends \phpbb\console\command\command
InputOption::VALUE_NONE,
$this->user->lang('CLI_DESCRIPTION_REPARSER_REPARSE_OPT_DRY_RUN')
)
->addOption(
'filter-callback',
null,
InputOption::VALUE_OPTIONAL,
$this->user->lang('CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_CALLBACK')
)
->addOption(
'filter-text-like',
null,
InputOption::VALUE_OPTIONAL,
$this->user->lang('CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_TEXT_LIKE')
)
->addOption(
'filter-text-regexp',
null,
InputOption::VALUE_OPTIONAL,
$this->user->lang('CLI_DESCRIPTION_REPARSER_REPARSE_OPT_FILTER_TEXT_REGEXP')
)
->addOption(
'resume',
null,
@ -161,6 +179,29 @@ class reparse extends \phpbb\console\command\command
return symfony_command::SUCCESS;
}
/**
* Return the record filter set for this command
*
* @see \phpbb\textreparser\reparser_interface::reparse_range()
*
* @return array
*/
protected function get_filter(): array
{
$filter = [];
$filter_options = ['filter-callback', 'filter-text-like', 'filter-text-regexp'];
foreach ($filter_options as $filter_option)
{
$value = $this->get_option($filter_option);
if ($value !== null)
{
$filter[$filter_option] = $value;
}
}
return $filter;
}
/**
* Get an option value, adjusted for given reparser
*
@ -221,6 +262,9 @@ class reparse extends \phpbb\console\command\command
$progress->setMessage($this->user->lang('CLI_REPARSER_REPARSE_REPARSING_START', $reparser->get_name()));
$progress->start();
// Initialize the record filter
$filter = $this->get_filter();
// Start from $max and decrement $current by $size until we reach $min
$current = $max;
while ($current >= $min)
@ -229,12 +273,22 @@ class reparse extends \phpbb\console\command\command
$end = max($min, $current);
$progress->setMessage($this->user->lang('CLI_REPARSER_REPARSE_REPARSING', $reparser->get_name(), $start, $end));
$reparser->reparse_range($start, $end);
$range = ['range-min' => $start, 'range-max' => $end];
$reparser->reparse($filter + $range);
$current = $start - 1;
$progress->setProgress($max + 1 - $start);
$this->reparser_manager->update_resume_data($name, $min, $current, $size, !$this->input->getOption('dry-run'));
$this->reparser_manager->update_resume_data(
$name,
$filter + [
'range-min' => $min,
'range-max' => $current,
'range-size' => $size
],
!$this->input->getOption('dry-run')
);
}
$progress->finish();

View file

@ -151,7 +151,11 @@ class reparser extends \phpbb\cron\task\base
$reparser->reparse_range($start, $end);
$this->reparser_manager->update_resume_data($this->reparser_name, $min, $start - 1, $size);
$this->resume_data['range-min'] = $min;
$this->resume_data['range-max'] = $start - 1;
$this->resume_data['range-size'] = $size;
$this->reparser_manager->update_resume_data($this->reparser_name, $this->resume_data);
}
$this->config->set($this->reparser_name . '_last_cron', time());

View file

@ -31,13 +31,17 @@ abstract class base implements reparser_interface
abstract public function get_max_id();
/**
* Return all records in given range
* Return all records that match given criteria
*
* @param integer $min_id Lower bound
* @param integer $max_id Upper bound
* @return array Array of records
* The concrete implementation does not have to handle filter-callback or filter-text-regexp
* which are already handled in reparse() via record_matches_filter()
*
* @see reparser_interface::reparse()
*
* @param array $config Criteria used to select records
* @return array Array of records
*/
abstract protected function get_records_by_range($min_id, $max_id);
abstract protected function get_records(array $config): array;
/**
* Save record
@ -219,12 +223,46 @@ abstract class base implements reparser_interface
/**
* {@inheritdoc}
*/
public function reparse(array $config = []): void
{
foreach ($this->get_records($config) as $record)
{
if ($this->record_matches_filter($record, $config))
{
$this->reparse_record($record);
}
}
}
/**
* {@inheritdoc}
*
* @deprecated 4.0.0
*/
public function reparse_range($min_id, $max_id)
{
foreach ($this->get_records_by_range($min_id, $max_id) as $record)
$this->reparse(['range-min' => $min_id, 'range-max' => $max_id]);
}
/**
* Test whether a record matches given filter
*
* @param array $record
* @param array $config
* @return bool
*/
protected function record_matches_filter(array $record, array $config): bool
{
if (isset($config['filter-text-regexp']) && !preg_match($config['filter-text-regexp'], $record['text']))
{
$this->reparse_record($record);
return false;
}
if (isset($config['filter-callback']) && !$config['filter-callback']($record))
{
return false;
}
return true;
}
/**

View file

@ -70,13 +70,23 @@ class manager
/**
* Updates the resume data in the database
*
* Resume data must contain the following elements:
* - range-min: lowest record ID
* - range-max: current record ID
* - range-size: number of records to process at a time
*
* Resume data may contain the following elements:
* - filter-callback: a callback that accepts a record as argument and returns a boolean
* - filter-text-like: a SQL LIKE predicate applied on the text, if applicable, e.g. '<r%'
* - filter-text-regexp: a PCRE regexp that matches against the text
*
* @see reparser_interface::reparse()
*
* @param string $name Name of the reparser to which the resume data belongs
* @param int $min Lowest record ID
* @param int $current Current record ID
* @param int $size Number of records to process at a time
* @param array $data Resume data
* @param bool $update_db True if the resume data should be written to the database, false if not. (default: true)
*/
public function update_resume_data($name, $min, $current, $size, $update_db = true)
public function update_resume_data(string $name, array $data, bool $update_db = true)
{
// Prevent overwriting the old, stored array
if ($this->resume_data === null)
@ -84,11 +94,7 @@ class manager
$this->get_resume_data('');
}
$this->resume_data[$name] = array(
'range-min' => $min,
'range-max' => $current,
'range-size' => $size,
);
$this->resume_data[$name] = $data;
if ($update_db)
{

View file

@ -41,7 +41,7 @@ class contact_admin_info extends \phpbb\textreparser\base
/**
* {@inheritdoc}
*/
protected function get_records_by_range($min_id, $max_id)
protected function get_records(array $config): array
{
$values = $this->config_text->get_array(array(
'contact_admin_info',

View file

@ -29,14 +29,19 @@ class poll_option extends \phpbb\textreparser\row_based_plugin
/**
* {@inheritdoc}
*/
protected function get_records_by_range_query($min_id, $max_id)
protected function get_records_sql(array $config): string
{
$sql = 'SELECT o.topic_id, o.poll_option_id, o.poll_option_text AS text, p.enable_bbcode, p.enable_smilies, p.enable_magic_url, p.bbcode_uid
FROM ' . POLL_OPTIONS_TABLE . ' o, ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . ' p
WHERE o.topic_id BETWEEN ' . $min_id . ' AND ' . $max_id .'
AND t.topic_id = o.topic_id
WHERE t.topic_id = o.topic_id
AND p.post_id = t.topic_first_post_id';
$where = $this->get_where_clauses($config, 'o.topic_id', 'o.poll_option_text');
if (!empty($where))
{
$sql .= "\nAND " . implode("\nAND ", $where);
}
return $sql;
}

View file

@ -29,14 +29,19 @@ class poll_title extends \phpbb\textreparser\row_based_plugin
/**
* {@inheritdoc}
*/
protected function get_records_by_range_query($min_id, $max_id)
protected function get_records_sql(array $config): string
{
$sql = 'SELECT t.topic_id AS id, t.poll_title AS text, p.enable_bbcode, p.enable_smilies, p.enable_magic_url, p.bbcode_uid
FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . ' p
WHERE t.topic_id BETWEEN ' . $min_id . ' AND ' . $max_id .'
AND t.poll_start > 0
WHERE t.poll_start > 0
AND p.post_id = t.topic_first_post_id';
$where = $this->get_where_clauses($config, 't.topic_id', 't.poll_title');
if (!empty($where))
{
$sql .= "\nAND " . implode("\nAND ", $where);
}
return $sql;
}
}

View file

@ -36,9 +36,28 @@ interface reparser_interface
*/
public function set_name($name);
/**
* Reparse all records that match given criteria
*
* Available criteria passed as $config:
* - filter-callback: a callback that accepts a record as argument and returns a boolean
* - filter-text-like: a SQL LIKE predicate applied on the text, if applicable, e.g. '<r%'
* - filter-text-regexp: a PCRE regexp that matches against the text
* - range-min: lowest record ID
* - range-max: highest record ID
*
* If a record does not match all criteria, it will generally be skipped. However, not all
* reparsers may support all kinds of filters and some non-matching records may be reparsed.
*
* @param array $config
*/
public function reparse(array $config = []): void;
/**
* Reparse all records in given range
*
* @deprecated 4.0.0
*
* @param integer $min_id Lower bound
* @param integer $max_id Upper bound
*/

View file

@ -62,10 +62,10 @@ abstract class row_based_plugin extends base
/**
* {@inheritdoc}
*/
protected function get_records_by_range($min_id, $max_id)
protected function get_records(array $config): array
{
$sql = $this->get_records_by_range_query($min_id, $max_id);
$result = $this->db->sql_query($sql);
$sql = $this->get_records_sql($config);
$result = $this->db->sql_query($sql);
$records = $this->db->sql_fetchrowset($result);
$this->db->sql_freeresult($result);
@ -73,13 +73,12 @@ abstract class row_based_plugin extends base
}
/**
* Generate the query that retrieves all records for given range
* Generate the query that retrieves records that match given criteria
*
* @param integer $min_id Lower bound
* @param integer $max_id Upper bound
* @return string SQL query
* @param array $config Criteria used to select records
* @return string SQL query
*/
protected function get_records_by_range_query($min_id, $max_id)
protected function get_records_sql(array $config): string
{
$columns = $this->get_columns();
$fields = array();
@ -95,13 +94,43 @@ abstract class row_based_plugin extends base
}
}
$sql = 'SELECT ' . implode(', ', $fields) . '
FROM ' . $this->table . '
WHERE ' . $columns['id'] . ' BETWEEN ' . $min_id . ' AND ' . $max_id;
$sql = 'SELECT ' . implode(', ', $fields) . ' FROM ' . $this->table;
$where = $this->get_where_clauses($config, $columns['id'], $columns['text']);
if (!empty($where))
{
$sql .= ' WHERE ' . implode("\nAND ", $where);
}
return $sql;
}
/**
* Generate WHERE clauses for given set of criteria
*
* @param array $config
* @param string $column_id Name for the id column, including its table alias
* @param string $column_text Name for the text column, including its table alias
* @return array Potentially empty list of SQL clauses
*/
protected function get_where_clauses(array $config, string $column_id, string $column_text): array
{
$where = [];
if (isset($config['range-min']))
{
$where[] = $column_id . ' >= ' . $config['range-min'];
}
if (isset($config['range-max']))
{
$where[] = $column_id . ' <= ' . $config['range-max'];
}
if (isset($config['filter-text-like']))
{
$where[] = $column_text . ' ' . $this->db->sql_like_expression(str_replace('%', $this->db->get_any_char(), $config['filter-text-like']));
}
return $where;
}
/**
* {@inheritdoc}
*/

View file

@ -81,4 +81,83 @@ class phpbb_textreparser_base_test extends phpbb_database_test_case
$this->get_rows([2])
);
}
public function test_reparse_filter_like()
{
$this->get_reparser()->reparse([
'range-min' => 3,
'range-max' => 4,
'filter-text-like' => '%foo123%'
]);
$this->assertEquals(
[
[
'id' => '3',
'text' => '<r><B><s>[b]</s>foo123<e>[/b]</e></B></r>'
],
[
'id' => '4',
'text' => '[b]bar456[/b]'
]
],
$this->get_rows([3, 4])
);
}
public function test_reparse_filter_regexp()
{
$this->get_reparser()->reparse([
'range-min' => 3,
'range-max' => 4,
'filter-text-regexp' => '(bar456)'
]);
$this->assertEquals(
[
[
'id' => '4',
'text' => '<r><B><s>[b]</s>bar456<e>[/b]</e></B></r>'
],
[
'id' => '5',
'text' => '[b]baz789[/b]'
]
],
$this->get_rows([4, 5])
);
}
public function test_reparse_filter_callback()
{
$record = [
'id' => '5',
'enable_bbcode' => '1',
'enable_smilies' => '1',
'enable_magic_url' => '1',
'text' => '[b]baz789[/b]',
'bbcode_uid' => ''
];
$mock = $this->getMockBuilder('stdClass')->setMethods(['foo'])->getMock();
$mock->expects($this->once())
->method('foo')
->with($record)
->will($this->returnValue(false));
$this->get_reparser()->reparse([
'range-min' => 5,
'range-max' => 5,
'filter-callback' => [$mock, 'foo']
]);
$this->assertEquals(
[
[
'id' => '5',
'text' => '[b]baz789[/b]'
]
],
$this->get_rows([5, 5])
);
}
}

View file

@ -23,5 +23,29 @@
<value><![CDATA[<r><IMG src="img.png"><s>[IMG]</s>img.png<e>[/IMG]</e></IMG></r>]]></value>
<value></value>
</row>
<row>
<value>3</value>
<value>1</value>
<value>1</value>
<value>1</value>
<value>[b]foo123[/b]</value>
<value></value>
</row>
<row>
<value>4</value>
<value>1</value>
<value>1</value>
<value>1</value>
<value>[b]bar456[/b]</value>
<value></value>
</row>
<row>
<value>5</value>
<value>1</value>
<value>1</value>
<value>1</value>
<value>[b]baz789[/b]</value>
<value></value>
</row>
</table>
</dataset>

View file

@ -76,10 +76,25 @@ class phpbb_text_reparser_manager_test extends phpbb_database_test_case
);
$this->config_text->set('reparser_resume', serialize($resume_data));
$this->reparser_manager->update_resume_data('another_reparser', 5, 20, 10, false);
$this->reparser_manager->update_resume_data(
'another_reparser',
[
'range-min' => 5,
'range-max' => 20,
'range-size' => 10,
],
false
);
$this->assert_array_content_equals($resume_data, unserialize($this->config_text->get('reparser_resume')));
$this->reparser_manager->update_resume_data('test_reparser', 0, 50, 50);
$this->reparser_manager->update_resume_data(
'test_reparser',
[
'range-min' => 0,
'range-max' => 50,
'range-size' => 50,
]
);
$resume_data = array(
'test_reparser' => array(
'range-min' => 0,

View file

@ -44,6 +44,16 @@
<value>13</value>
<value><![CDATA[[b]Not bold[/b] :) <!-- m --><a class="postlink" href="http://example.org">http://example.org</a><!-- m -->]]></value>
</row>
<row>
<value>1</value>
<value>100</value>
<value>Matches LIKE foo123</value>
</row>
<row>
<value>2</value>
<value>100</value>
<value>Does not match LIKE</value>
</row>
<row>
<value>1</value>
<value>123</value>
@ -124,6 +134,11 @@
<value>13</value>
<value>Magic URLs</value>
</row>
<row>
<value>100</value>
<value>1</value>
<value>Topic #100</value>
</row>
<row>
<value>123</value>
<value>1</value>

View file

@ -99,6 +99,18 @@
<value><![CDATA[[img]http://example.org/img.png[/img]]]></value>
<value>1</value>
</row>
<row>
<value>100</value>
<value>1</value>
<value>Matches LIKE foo123</value>
<value>1</value>
</row>
<row>
<value>101</value>
<value>1</value>
<value>Does not match LIKE</value>
<value>1</value>
</row>
<row>
<value>1000</value>
<value>1</value>

View file

@ -27,10 +27,11 @@ class phpbb_textreparser_poll_option_test extends phpbb_database_test_case
return new \phpbb\textreparser\plugins\poll_option($this->db, POLL_OPTIONS_TABLE);
}
protected function get_rows()
protected function get_rows(array $ids = null)
{
$sql = 'SELECT topic_id, poll_option_id, poll_option_text
FROM ' . POLL_OPTIONS_TABLE . '
WHERE ' . $this->db->sql_in_set('topic_id', $ids) . '
ORDER BY topic_id, poll_option_id';
$result = $this->db->sql_query($sql);
$rows = $this->db->sql_fetchrowset($result);
@ -59,11 +60,11 @@ class phpbb_textreparser_poll_option_test extends phpbb_database_test_case
public function test_dry_run()
{
$old_rows = $this->get_rows();
$old_rows = $this->get_rows([1]);
$reparser = $this->get_reparser();
$reparser->disable_save();
$reparser->reparse_range(1, 1);
$new_rows = $this->get_rows();
$new_rows = $this->get_rows([1]);
$this->assertEquals($old_rows, $new_rows);
}
@ -124,6 +125,32 @@ class phpbb_textreparser_poll_option_test extends phpbb_database_test_case
'poll_option_text' => 'This row should be [b:abcd1234]ignored[/b:abcd1234]',
),
);
$this->assertEquals($expected, $this->get_rows());
$this->assertEquals($expected, $this->get_rows([1, 2, 11, 12, 13, 123]));
}
public function test_filter_like()
{
$reparser = $this->get_reparser();
$reparser->reparse([
'range-min' => 100,
'range-max' => 100,
'filter-text-like' => '%foo123%'
]);
$expected = [
[
'topic_id' => 100,
'poll_option_id' => 1,
'poll_option_text' => '<t>Matches LIKE foo123</t>'
],
[
'topic_id' => 100,
'poll_option_id' => 2,
'poll_option_text' => 'Does not match LIKE'
],
];
$this->assertEquals($expected, $this->get_rows([100, 100]));
}
}

View file

@ -23,4 +23,27 @@ class phpbb_textreparser_poll_title_test extends phpbb_textreparser_test_row_bas
{
return new \phpbb\textreparser\plugins\poll_title($this->db, TOPICS_TABLE);
}
public function test_filter_like()
{
$reparser = $this->get_reparser();
$reparser->reparse([
'range-min' => 100,
'range-max' => 101,
'filter-text-like' => '%foo123%'
]);
$expected = [
[
'id' => '100',
'text' => '<t>Matches LIKE foo123</t>'
],
[
'id' => '101',
'text' => 'Does not match LIKE'
]
];
$this->assertEquals($expected, $this->get_rows([100, 101]));
}
}