FIX tipue search

This commit is contained in:
boyska 2018-04-13 01:09:55 +02:00
parent 9bc4f0f1f8
commit 4bd0e12be2
3 changed files with 175 additions and 0 deletions

View file

@ -0,0 +1,67 @@
Tipue Search
============
A Pelican plugin to serialize generated HTML to JSON that can be used by jQuery plugin - Tipue Search.
Copyright (c) Talha Mansoor
Author | Talha Mansoor
----------------|-----
Author Email | talha131@gmail.com
Author Homepage | http://onCrashReboot.com
Github Account | https://github.com/talha131
Why do you need it?
===================
Static sites do not offer search feature out of the box. [Tipue Search](http://www.tipue.com/search/)
is a jQuery plugin that search the static site without using any third party service, like DuckDuckGo or Google.
Tipue Search offers 4 search modes. Its [JSON search mode](http://www.tipue.com/search/docs/json/) is the best search mode
especially for large sites.
Tipue's JSON search mode requires the textual content of site in JSON format.
Requirements
============
Tipue Search requires BeautifulSoup.
```bash
pip install beautifulsoup4
```
How Tipue Search works
=========================
Tipue Search serializes the generated HTML into JSON. Format of JSON is as follows
```python
{
"pages": [
{
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer nec odio. Praesent libero. Sed cursus ante dapibus diam. Sed nisi. Nulla quis sem at nibh elementum imperdiet. Duis sagittis ipsum. Praesent mauris. Fusce nec tellus sed augue semper porta. Mauris massa. Vestibulum lacinia arcu eget nulla. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Curabitur sodales ligula in libero.",
"tags": "Example Category",
"url" : "http://oncrashreboot.com/plugin-example.html",
"title": "Everything you want to know about Lorem Ipsum"
},
{
"text": "Sed dignissim lacinia nunc. Curabitur tortor. Pellentesque nibh. Aenean quam. In scelerisque sem at dolor. Maecenas mattis. Sed convallis tristique sem. Proin ut ligula vel nunc egestas porttitor. Morbi lectus risus, iaculis vel, suscipit quis, luctus non, massa. Fusce ac turpis quis ligula lacinia aliquet. Mauris ipsum. Nulla metus metus, ullamcorper vel, tincidunt sed, euismod in, nibh.",
"tags": "Example Category",
"url" : "http://oncrashreboot.com/plugin-example-2.html",
"title": "Review of the book Lorem Ipsum"
}
]
}
```
JSON is written to file `tipuesearch_content.json` which is created in the root of `output` directory.
How to use
==========
To utilize JSON Search mode, your theme needs to have Tipue Search properly configured in it. [Official documentation](http://www.tipue.com/search/docs/#json) has the required details.
Pelican [Elegant Theme](https://github.com/talha131/pelican-elegant) and [Plumage
theme](https://github.com/kdeldycke/plumage) have Tipue Search configured. You can view their
code to understand the configuration.

View file

@ -0,0 +1 @@
from .tipue_search import *

View file

@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""
Tipue Search
============
A Pelican plugin to serialize generated HTML to JSON
that can be used by jQuery plugin - Tipue Search.
Copyright (c) Talha Mansoor
"""
from __future__ import unicode_literals
import os.path
import json
from bs4 import BeautifulSoup
from codecs import open
try:
from urlparse import urljoin
except ImportError:
from urllib.parse import urljoin
from pelican import signals
class Tipue_Search_JSON_Generator(object):
def __init__(self, context, settings, path, theme, output_path, *null):
self.output_path = output_path
self.context = context
self.siteurl = settings.get('SITEURL')
self.relative_urls = settings.get('RELATIVE_URLS')
self.tpages = settings.get('TEMPLATE_PAGES')
self.output_path = output_path
self.json_nodes = []
def create_json_node(self, page):
if getattr(page, 'status', 'published') != 'published':
return
soup_title = BeautifulSoup(page.title.replace(' ', ' '), 'html.parser')
page_title = soup_title.get_text(' ', strip=True).replace('', '"').replace('', '"').replace('', "'").replace('^', '^')
soup_text = BeautifulSoup(page.content, 'html.parser')
page_text = soup_text.get_text(' ', strip=True).replace('', '"').replace('', '"').replace('', "'").replace('', ' ').replace('^', '^')
page_text = ' '.join(page_text.split())
page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else ''
page_url = '.'
if page.url:
page_url = page.url if self.relative_urls else (self.siteurl + '/' + page.url)
node = {'title': page_title,
'text': page_text,
'tags': page_category,
'loc': page_url}
self.json_nodes.append(node)
def create_tpage_node(self, srclink):
with open(os.path.join(self.output_path, self.tpages[srclink]),
encoding='utf-8') as srcfile:
soup = BeautifulSoup(srcfile, 'html.parser')
page_title = soup.title.string if soup.title is not None else ''
page_text = soup.get_text()
# Should set default category?
page_category = ''
page_url = urljoin(self.siteurl, self.tpages[srclink])
node = {'title': page_title,
'text': page_text,
'tags': page_category,
'url': page_url}
self.json_nodes.append(node)
def generate_output(self, writer):
# bisognerebbe cambiare usando questo coso
# for p in self.context['PAGES']:
# print 'U', p.url
path = os.path.join(self.output_path, 'tipuesearch_content.json')
pages = self.context['pages'] + self.context['articles']
for article in self.context['articles']:
pages += article.translations
for srclink in self.tpages:
self.create_tpage_node(srclink)
for page in pages:
self.create_json_node(page)
root_node = {'pages': self.json_nodes}
with open(path, 'w', encoding='utf-8') as fd:
json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
def get_generators(generators):
return Tipue_Search_JSON_Generator
def register():
signals.get_generators.connect(get_generators)