Skip to content

Instantly share code, notes, and snippets.

@Sogl
Last active June 26, 2023 18:30
Show Gist options
  • Select an option

  • Save Sogl/78dce78b254b39a6678f759b3ff981b7 to your computer and use it in GitHub Desktop.

Select an option

Save Sogl/78dce78b254b39a6678f759b3ff981b7 to your computer and use it in GitHub Desktop.

Revisions

  1. Sogl revised this gist Jun 26, 2023. 1 changed file with 33 additions and 0 deletions.
    33 changes: 33 additions & 0 deletions GravFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -10,6 +10,10 @@
    use Grav\Framework\Flex\FlexDirectory;
    use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection;

    use Grav\Plugin\PageToc\MarkupFixer;
    use Grav\Common\Page\Interfaces\PageInterface;
    use Grav\Common\Plugin;

    class GravFlexpageSearch extends GravPageSearch
    {

    @@ -123,6 +127,9 @@ protected function indexPages(array $options = []): array
    $callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>');
    }

    //for TOC search
    $markup_fixer = new MarkupFixer();

    foreach ($collection as $page) {
    $url = $page->url();

    @@ -167,6 +174,9 @@ protected function indexPages(array $options = []): array
    }

    $content = trim($page->content());
    //for TOC search
    $content = $markup_fixer->fix($content, $this->getAnchorOptions($page));

    $skip_empty_content = $this->index_configuration->get('content.skip_empty', true);
    if ($skip_empty_content && empty($content)) {
    $status[] = [
    @@ -219,6 +229,29 @@ protected function indexPages(array $options = []): array
    return $status;
    }


    protected function getAnchorOptions(PageInterface $page = null, $start = null, $depth = null): array
    {
    $page = $page ?? $this->grav['page'];
    return [
    'start' => (int) ($start ?? $this->configVar('anchors.start', $page,1)),
    'depth' => (int) ($depth ?? $this->configVar('anchors.depth', $page,6)),
    'hclass' => $this->configVar('hclass', $page,null),
    'link' => $this->configVar('anchors.link', $page,true),
    'position' => $this->configVar('anchors.position', $page,'before'),
    'aria' => $this->configVar('anchors.aria', $page,'Anchor'),
    'icon' => $this->configVar('anchors.icon', $page,'#'),
    'class' => $this->configVar('anchors.class', $page,null),
    'maxlen' => (int) ($this->configVar('anchors.slug_maxlen', $page,null)),
    'prefix' => $this->configVar('anchors.slug_prefix', $page,null),
    ];
    }

    public static function configVar($var, $page = null, $default = null)
    {
    return Plugin::inheritedConfigOption('page-toc', $var, $page, $default);
    }

    public function modifyObject(object $object, array $options = [], bool $update = true): array
    {
    //we don't need to perform operations on each edit/delete
  2. Sogl revised this gist Mar 19, 2023. 2 changed files with 229 additions and 1 deletion.
    2 changes: 1 addition & 1 deletion CrawlFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -15,7 +15,6 @@ class CrawlFlexpageSearch extends CrawlPageSearch
    '/forgot_password',
    '/login',
    '/profile',
    '/search',
    '/therapies',
    '/therapies/add',
    '/',
    @@ -38,6 +37,7 @@ protected function indexPageResponses(string $lang, array $responses): array

    /** @var Pages $pages */
    $pages = $grav['pages'];
    $pages->enablePages();

    $flex = Grav::instance()->get('flex');
    $initialPage = $pages->find('/therapies/therapy');
    228 changes: 228 additions & 0 deletions GravFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,228 @@
    <?php declare(strict_types=1);

    namespace Grav\Plugin\AlgoliaPro;

    use Grav\Common\Grav;
    use Grav\Common\Page\Pages;
    use Grav\Common\Yaml;
    use RocketTheme\Toolbox\Event\Event;
    use Grav\Framework\Flex\Flex;
    use Grav\Framework\Flex\FlexDirectory;
    use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection;

    class GravFlexpageSearch extends GravPageSearch
    {

    protected static $notAllowedRoutes = [
    '/grid',
    '/forgot_password',
    '/login',
    '/profile',
    '/therapies',
    '/therapies/add',
    '/therapies/therapy',
    '/',
    '/zachem-etot-sait/o-lekciyakh',
    '/zachem-etot-sait/ob-avtorakh',
    '/zachem-etot-sait/oplata-i-dostup',
    '/zachem-etot-sait/updates',
    ];

    protected static $allowedTemplates = [
    'docs',
    'therapy'
    ];

    protected function indexPages(array $options = []): array
    {
    $grav = Grav::instance();

    /** @var Pages $pages */
    $pages = $grav['pages'];
    $pages->enablePages();

    //MY CUSTOM FLEX CODE
    /** @var Flex $flex */
    $flex = Grav::instance()->get('flex');
    $initialPage = $pages->find('/therapies/therapy');

    /** @var FlexDirectory $directory */
    $directory = $flex->getDirectory('therapies');

    /** @var TherapiesCollection $collection */
    $collection = $directory->getCollection()->filterBy(['published' => true]);

    foreach ($collection as $therapy) {
    //clone initial fake Page
    //this eliminates the error when one entry is written everywhere
    $page = clone $initialPage;

    $routeObj = $therapy->getProperty('slug');
    $route = '/therapies/' . $routeObj;

    $page->id($page->modified() . md5($route));

    $page->slug($routeObj);
    $page->folder($routeObj);
    $page->route($route);
    $page->rawRoute($route);

    $page->title($therapy->getProperty('title'));
    $page->content($therapy->getProperty('description'));

    // taxonomy
    $tags['tag'] = $therapy->getProperty('tags');

    $page->taxonomy($tags);

    $pages->addPage($page, $route);

    //clear object
    $page = null;
    }

    // Get custom filters
    $filter = $this->index_configuration->get('filters');
    $records = [];
    $status = [];

    $lang = $options['lang'] ?? null;
    $route = $options['route'] ?? null;
    $index = $this->getIndexer($lang);
    $collection = [];

    if ($route) {
    $page = $pages->find($route);
    if ($page && $page->exists() && $page->routable() && $page->published()) {
    $collection[] = $page;
    }
    } elseif (is_array($filter) && array_key_exists('items', $filter)) {
    if (is_string($filter['items'])) {
    $filter['items'] = Yaml::parse($filter['items']);
    }

    $collection = $pages->getCollection($filter)->published()->routable();
    } else {
    $collection = $pages->all()->published()->routable();
    }

    //delete some routes
    foreach (static::$notAllowedRoutes as $naRoute) {
    $tempPage = $pages->find($naRoute);
    if ($tempPage) {
    $collection->remove($tempPage->path());
    }
    }

    //and use only specific templates
    $collection = $collection->ofOneOfTheseTypes(static::$allowedTemplates);

    $steps = count($collection);

    if ($callback = $this->getProgressCallback()) {
    $callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>');
    }

    foreach ($collection as $page) {
    $url = $page->url();

    // update progress callback
    if ($callback = $this->getProgressCallback()) {
    $callback();
    }

    if (!$this->processPage($page)) {
    $status[] = [
    'status' => 'info',
    'msg' => 'Page manually skipped',
    'url' => $url
    ];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    continue;
    }

    if ($page->redirect()) {
    $status[] = [
    'status' => 'info',
    'msg' => 'Page is a redirect',
    'url' => $url
    ];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    continue;
    }

    try {
    $skip_event = Grav::instance()->fireEvent('onAlgoliaProPageSkip',
    new Event(['name' => $this->name, 'config' => $this->index_configuration, 'object' => $page]));
    if (isset($skip_event['status'])) {
    $status[] = $skip_event['status'];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    continue;
    }

    $content = trim($page->content());
    $skip_empty_content = $this->index_configuration->get('content.skip_empty', true);
    if ($skip_empty_content && empty($content)) {
    $status[] = [
    'status' => 'info',
    'msg' => 'Page has no content, skipping',
    'url' => $url
    ];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    continue;
    }

    $page_records = $this->getPageData($content, $page);
    $updatable_records = $this->recordsNeedUpdating($page_records);
    $records = array_merge($records, $updatable_records);
    if (count($records) > 0) {
    $status[] = [
    'status' => 'success',
    'msg' => 'Page indexed',
    'url' => $url
    ];
    } else {
    $status[] = [
    'status' => 'info',
    'msg' => 'Cache entry found, no records need updating',
    'url' => $url
    ];
    }


    } catch (\Exception $e) {
    $status[] = [
    'status' => 'error',
    'msg' => $e->getMessage(),
    'url' => $url
    ];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    }
    }

    if ($this->production_mode && !empty($records)) {
    $index->partialUpdateObjects($records, [
    'createIfNotExists' => true
    ]);
    }

    return $status;
    }

    public function modifyObject(object $object, array $options = [], bool $update = true): array
    {
    //we don't need to perform operations on each edit/delete
    return ['status' => 'success', 'message' => 'test mode'];
    }

    }
  3. Sogl revised this gist Mar 18, 2023. 1 changed file with 24 additions and 1 deletion.
    25 changes: 24 additions & 1 deletion CrawlFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -9,6 +9,23 @@

    class CrawlFlexpageSearch extends CrawlPageSearch
    {

    protected static $notAllowedRoutes = [
    '/grid',
    '/forgot_password',
    '/login',
    '/profile',
    '/search',
    '/therapies',
    '/therapies/add',
    '/',
    '/zachem-etot-sait/o-lekciyakh',
    '/zachem-etot-sait/ob-avtorakh',
    '/zachem-etot-sait/oplata-i-dostup',
    '/zachem-etot-sait/updates',
    ];


    /**
    * Process and index Page based on response + Grav page
    * @param string $lang
    @@ -26,10 +43,16 @@ protected function indexPageResponses(string $lang, array $responses): array
    $initialPage = $pages->find('/therapies/therapy');

    //Flex objects loop
    foreach ($responses as $response) {
    foreach ($responses as $key => $response) {
    $fulUrl = $response->getInfo()['url'];
    $route = parse_url($fulUrl, PHP_URL_PATH);

    //we don't need some routes
    if (in_array($route, static::$notAllowedRoutes)) {
    unset($responses[$key]);
    continue;
    }

    if (Utils::startsWith($route, '/therapies/')) {
    $routeObj = basename($route);

  4. Sogl revised this gist Feb 9, 2023. 1 changed file with 7 additions and 7 deletions.
    14 changes: 7 additions & 7 deletions CrawlFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -6,7 +6,6 @@
    use Grav\Common\Page\Interfaces\PageInterface;
    use Grav\Common\Page\Pages;
    use Grav\Common\Utils;
    use Grav\Common\Page\Page;

    class CrawlFlexpageSearch extends CrawlPageSearch
    {
    @@ -24,7 +23,7 @@ protected function indexPageResponses(string $lang, array $responses): array
    $pages = $grav['pages'];

    $flex = Grav::instance()->get('flex');
    $page = $pages->find('/therapies/therapy');
    $initialPage = $pages->find('/therapies/therapy');

    //Flex objects loop
    foreach ($responses as $response) {
    @@ -36,9 +35,9 @@ protected function indexPageResponses(string $lang, array $responses): array

    $therapy = $flex->getObject($routeObj, 'therapies');

    //fake Page
    // $page = new Page();
    //$page = $pages->find('/therapies/therapy');
    //clone initial fake Page
    //this eliminates the error when one entry is written everywhere
    $page = clone $initialPage;
    $page->id($page->modified() . md5($route));

    $page->slug($routeObj);
    @@ -55,11 +54,12 @@ protected function indexPageResponses(string $lang, array $responses): array
    $page->taxonomy($tags);

    $pages->addPage($page, $route);

    //clear object
    $page = null;
    }

    }

    $page = null;


    $index = $this->getIndexer($lang);
  5. Sogl revised this gist Feb 9, 2023. 1 changed file with 111 additions and 0 deletions.
    111 changes: 111 additions & 0 deletions CrawlFlexpageSearch.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,111 @@
    <?php declare(strict_types=1);

    namespace Grav\Plugin\AlgoliaPro;

    use Grav\Common\Grav;
    use Grav\Common\Page\Interfaces\PageInterface;
    use Grav\Common\Page\Pages;
    use Grav\Common\Utils;
    use Grav\Common\Page\Page;

    class CrawlFlexpageSearch extends CrawlPageSearch
    {
    /**
    * Process and index Page based on response + Grav page
    * @param string $lang
    * @param array $responses
    * @return array
    */
    protected function indexPageResponses(string $lang, array $responses): array
    {
    $grav = Grav::instance();

    /** @var Pages $pages */
    $pages = $grav['pages'];

    $flex = Grav::instance()->get('flex');
    $page = $pages->find('/therapies/therapy');

    //Flex objects loop
    foreach ($responses as $response) {
    $fulUrl = $response->getInfo()['url'];
    $route = parse_url($fulUrl, PHP_URL_PATH);

    if (Utils::startsWith($route, '/therapies/')) {
    $routeObj = basename($route);

    $therapy = $flex->getObject($routeObj, 'therapies');

    //fake Page
    // $page = new Page();
    //$page = $pages->find('/therapies/therapy');
    $page->id($page->modified() . md5($route));

    $page->slug($routeObj);
    $page->folder($routeObj);
    $page->route($route);
    $page->rawRoute($route);

    $page->title($therapy->getProperty('title'));
    $page->content($therapy->getProperty('description'));

    // taxonomy
    $tags['tag'] = $therapy->getProperty('tags');

    $page->taxonomy($tags);

    $pages->addPage($page, $route);
    }

    }

    $page = null;


    $index = $this->getIndexer($lang);
    $status = [];
    $records = [];
    $steps = count($responses);

    if ($callback = $this->getProgressCallback()) {
    $callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>');
    }

    foreach ($responses as $response) {
    $headers = $response->getHeaders();
    $info = $response->getInfo();
    $url = $info['url'] ?? 'unknown';
    $route = $headers['grav-page-route'][0] ?? '';
    $base = $headers['grav-base'][0] ?? '';
    $page = $pages->find($route);

    if ($base) {
    $url = str_replace($base, '', $url);
    }

    if ($page instanceof PageInterface) {
    $this->addRecordFromResponse($page, $response, $url,$records, $status);
    } else {
    $status[] = [
    'status' => 'error',
    'msg' => 'Page Not Found: ' . $route,
    'url' => $url
    ];
    if ($callback = $this->getProgressCallback()) {
    $callback(-1);
    }
    }
    }



    if ($this->production_mode !== false && !empty($records)) {
    $index->partialUpdateObjects($records, [
    'createIfNotExists' => true
    ]);
    }

    return $status;
    }

    }
  6. Sogl created this gist Feb 6, 2023.
    202 changes: 202 additions & 0 deletions FlexTherapiesSearch.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,202 @@
    <?php declare(strict_types=1);

    namespace Grav\Plugin\AlgoliaPro;

    use Grav\Common\Grav;
    use Grav\Framework\Flex\Flex;
    use Grav\Framework\Flex\FlexDirectory;
    use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection;
    use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapyObject;
    use Grav\Framework\Flex\Interfaces\FlexCollectionInterface;
    use Grav\Framework\Flex\Interfaces\FlexObjectInterface;
    use Grav\Common\Utils;
    use Grav\Plugin\ShortcodeCore\ShortcodeManager;
    use Grav\Common\Page\Page;

    class FlexTherapiesSearch extends FlexSearch implements AlgoliaProClassInterface
    {

    /**
    * @param array $options
    * @return array
    */
    public function indexConfiguration(array $options = []): array
    {
    $grav = Grav::instance();
    $conf = $this->index_configuration;
    $name = null;

    /** @var Language $language */
    $language = $grav['language'];
    if ($language->enabled()) {
    if (isset($options['lang'])) {
    $lang = $options['lang'];
    unset($options['lang']);
    }
    $name = $lang ?? $language->getActive() ?? $language->getDefault();
    }

    $index = $this->getIndexer($name);
    $conf->set('name', $index->getIndexName());

    return $conf->toArray();
    }

    /**
    * Return collection of objects to be indexed. Make sure you filter away inaccessible objects.
    *
    * @return FlexCollectionInterface
    */
    protected function getFilteredCollection(): FlexCollectionInterface
    {
    $grav = Grav::instance();

    /** @var Flex $flex */
    $flex = $grav['flex'];

    /** @var FlexDirectory $directory */
    $directory = $flex->getDirectory('therapies');

    /** @var TherapiesCollection $collection */
    $collection = $directory->getCollection()->filterBy(['published' => true]);

    return $collection;

    }

    /**
    * Return true if object can be handled by this class.
    *
    * @param FlexObjectInterface $object
    * @return bool
    */
    protected function checkObject(FlexObjectInterface $object): bool
    {
    return $object instanceof TherapyObject;
    }

    /**
    * Each object can have multiple records, so return array of records.
    *
    * @param TherapyObject $object
    * @return array
    */
    protected function getRecord(FlexObjectInterface $object): array
    {
    //set content and enable toc anchors
    $page = new Page();
    $page->content($object->description);
    //get content
    $content = $page->content();


    $record = new \stdClass();

    $object_url = $this->getUrl($object);
    $record->url = $object_url;

    $record->title = $object->title;
    $record->summary = $this->shortenText($content, 256);
    $record->access = null;

    // taxonomy
    $tags = $object->tags;
    if (!empty($tags)) {
    $record->taxonomy['tag'] = $tags;
    }

    // language
    $record->language = 'ru';

    // breadcrumbs
    $breadcrumbs = [];
    $breadcrumbs[] = ['name' => 'Случаи терапии', 'url' => '/therapies'];
    $breadcrumbs[] = ['name' => $object->title, 'url' => $object_url];
    $record->breadcrumbs = $breadcrumbs;


    $base_url = trim($object_url, '/');
    $base_id = md5($base_url);


    //works
    // $content = Utils::processMarkdown($object->description);
    // $sm = new ShortcodeManager();
    // $content = $sm->processShortcodes($content);

    $flex_data = (array) $record;

    // content processing
    $blocks = $this->splitHTMLContent($content);
    $flex_chunks = [];
    $counter = 1;

    foreach ($blocks as $block) {
    $block_data = [];
    $block_content = $block['content'] ?? '';

    if (isset($block['tag'], $block['header'])) {
    $block_data['objectType'] = 'header';
    $block_data['headers'][$block['tag']][] = $block['header'];
    $block_data['subtitle'] = $block['header'];
    $block_data['summary'] = $this->getFirstWords($block_content, 50);

    if (!empty($block['id'])) {
    $block_data['url'] = $flex_data['url'] . '#' . $block['id'];
    }
    }

    $block_chunks = $this->splitContentIntoChunks($block_content);
    foreach ($block_chunks as $chunk) {
    $block_data['objectID'] = $base_id . '_' . $counter++;
    $block_data['baseURL'] = $base_url;
    $block_data['content'] = $chunk;
    $flex_chunks[] = array_merge($flex_data, $block_data);
    }
    }


    return $flex_chunks;
    }

    /**
    * Get URL for the object.
    *
    * @param FlexObjectInterface $object
    * @return string|null
    */
    protected function getUrl(FlexObjectInterface $object): ?string
    {
    return $object->url();
    }


    function shortenText($text, $max_length = 140, $cut_off = '', $keep_word = false)
    {
    //clear all tags and delete whitespaces
    $text = trim(strip_tags($text));

    if(strlen($text) <= $max_length) {
    return $text;
    }

    if(strlen($text) > $max_length) {
    if($keep_word) {
    $text = mb_substr($text, 0, $max_length + 1);

    if($last_space = strrpos($text, ' ')) {
    $text = mb_substr($text, 0, $last_space);
    $text = rtrim($text);
    $text .= $cut_off;
    }
    } else {
    $text = mb_substr($text, 0, $max_length);
    $text = rtrim($text);
    $text .= $cut_off;
    }
    }

    return $text;
    }

    }