Skip to content
Snippets Groups Projects
Verified Commit c0cee02a authored by jindra12's avatar jindra12
Browse files

Search articles works now, tested main

parent b5bfecb6
No related branches found
No related tags found
2 merge requests!816Release,!801Prepare basic shared tags
Pipeline #13876 passed
......@@ -23,6 +23,7 @@ from wagtail.admin.panels import (
from wagtail.contrib.routable_page.models import RoutablePageMixin, route
from wagtail.fields import RichTextField, StreamField
from wagtail.models import Orderable, Page
from wagtail.search import index
from wagtailmetadata.models import MetadataPageMixin
from calendar_utils.models import CalendarMixin
......@@ -423,6 +424,11 @@ class DistrictArticlePage(
related_name="thumb_image",
)
search_fields = ArticleMixin.search_fields + [
index.SearchField("author_page"),
index.FilterField("slug"),
]
### PANELS
content_panels = ArticleMixin.content_panels + [
......@@ -563,9 +569,11 @@ class DistrictArticlesPage(
# (tags__slug)
context = super().get_context(request)
articles = self.append_all_shared_articles(
articles = self.materialize_articles_as_id_only(
self.append_all_shared_articles_query(
DistrictArticlePage.objects.child_of(self)
)
)
page_ids = list(map(lambda article: article.page_ptr.id, articles))
......
......@@ -234,8 +234,10 @@ class MainHomePage(
.order_by("-date")[:3]
)
articles_for_article_section = self.append_all_shared_articles(
articles_for_article_section = self.materialize_shared_articles_query(
self.append_all_shared_articles_query(
MainArticlePage.objects.filter(article_type=ARTICLE_TYPES.PRESS_RELEASE)
)[:8]
)
context["article_main"] = (
articles_for_article_section[0] if articles_for_article_section else None
......@@ -252,14 +254,10 @@ class MainHomePage(
).order_by("-date")
context = {"article_data_list": sorted_article_qs[:3]}
else:
sorted_article_qs = self.append_all_shared_articles_query(
MainArticlePage.objects.filter(region=request.GET.get("region", None))
)
context = {
"article_data_list": self.materialize_shared_articles_query(
sorted_article_qs[:3]
)
}
sorted_article_qs = MainArticlePage.objects.filter(
region=request.GET.get("region", None)
)[:3]
context = {"article_data_list": sorted_article_qs[:3]}
data = {
"html": render(
request, "main/includes/small_article_preview.html", context
......@@ -521,15 +519,11 @@ class MainArticlesPage(
return JsonResponse(data=data, safe=False)
def get_all_articles_search_response(self, request):
article_page = self.get_page_with_shared_articles(
self.append_all_shared_articles_query(
MainArticlePage.objects.search(
article_page = self.search_articles(
request.GET["q"],
),
custom_article_query=lambda query: query.search(request.GET["q"]),
),
10,
request.GET.get("page", 1),
MainArticlePage.objects.descendant_of(self),
)
context = {"article_data_list": article_page.object_list}
html_content = render(
......@@ -554,11 +548,8 @@ class MainArticlesPage(
if request.method == "GET" and "q" in request.GET:
query = request.GET["q"]
article_results = self.materialize_shared_articles_query(
self.append_all_shared_articles_query(
MainArticlePage.objects.search(query),
custom_article_query=lambda query: query.search(query),
)[:11]
article_results = self.search_articles(
query, 11, 1, MainArticlePage.objects.descendant_of(self)
)
return render(
......@@ -648,12 +639,9 @@ class MainArticlePage(
blank=True,
)
search_fields = Page.search_fields + [
index.SearchField("title"),
index.SearchField("author"),
search_fields = ArticleMixin.search_fields + [
index.SearchField("author_page"),
index.SearchField("perex"),
index.SearchField("content"),
index.FilterField("slug"),
]
### PANELS
......
......@@ -14,6 +14,7 @@ from taggit.models import ItemBase, Tag, TagBase
from wagtail.admin.panels import FieldPanel, MultiFieldPanel, PublishingPanel
from wagtail.fields import StreamField
from wagtail.models import Page
from wagtail.search import index
from shared.blocks import (
DEFAULT_CONTENT_BLOCKS,
......@@ -78,6 +79,13 @@ class ArticleMixin(models.Model):
on_delete=models.PROTECT,
) # hidden field to indicate the article is from another page
search_fields = Page.search_fields + [
index.SearchField("title"),
index.SearchField("author"),
index.SearchField("perex"),
index.SearchField("content"),
]
### PANELS
content_panels = Page.content_panels + [
......@@ -305,7 +313,17 @@ class ArticlesMixin(models.Model):
content_panels = Page.content_panels + [FieldPanel("shared_tags")]
def merge_dict(self, aDict: dict, bDict: dict):
"""
Utility for efficiently merging dict objects in lambda queries
"""
aDict.update(bDict)
return aDict
def determine_page_type(self):
"""
Determines which article type to use based on the module from which this method is run from
"""
if self._meta.app_label == "district":
return SharedArticlesPageType.DISTRICT
elif self._meta.app_label == "uniweb":
......@@ -314,37 +332,68 @@ class ArticlesMixin(models.Model):
return SharedArticlesPageType.MAIN
def evaluate_page_query(self, results):
"""
Utility for merging and materializing articles query to prevent duplicities.
Prefers original articles as opposed to shared ones (if we share an article to the same web that it originates from)
"""
return list(
reduce(
lambda unique, item: unique
if item["union_page_ptr_id"] in unique
and "union_shared_from_id" in item
else unique | {f"{item['union_page_ptr_id']}": item},
and item["union_shared_from_id"] is not None
else self.merge_dict(unique, {item["union_page_ptr_id"]: item}),
list(results),
{},
).values()
)
def unique_articles_by_id(self, results):
"""
Utility creating an unique results list with preference for non-shared articles
Prefers original articles as opposed to shared ones (if we share an article to the same web that it originates from)
"""
return list(
reduce(
lambda unique, item: unique
if item.page_ptr.id in unique and item.shared_from is not None
else self.merge_dict(unique, {item.page_ptr.id: item}),
results,
{},
).values()
)
def create_base_shared_query(self, query):
"""
Returns a query filtered by shared tags
"""
return (
query.filter(
shared_tags__slug__in=self.shared_tags.values_list("slug", flat=True)
)
.live()
.specific()
)
def append_all_shared_articles_query(
self,
previous_query: models.QuerySet | None = None,
custom_article_query=None,
):
"""
To prevent circular deps, we get class models during runtime
Creates articles query with shared articles as well as articles pre-selected by previous_query parameter
Returns an unionized query with .values() being applied on it. Unionized queries cannot be annotated or filtered.
If you wish to run annotation or additional filters, use custom_article_query param. This parameter accepts lambdas with
two parameters: shared article query (before unionizing) and shared articles enum, denoting the origin of shared articles
"""
# To prevent circular deps, we get class models during runtime
DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage")
UniwebArticlePage = apps.get_model(app_label="uniweb.UniwebArticlePage")
MainArticlePage = apps.get_model(app_label="main.MainArticlePage")
page_type = self.determine_page_type()
"""
In order to balance union() requirements for tables with same-fields only, we are adding null fields using values().
These values must be in correct order
"""
# In order to balance union() requirements for tables with same-fields only, we are adding null fields using values(). These values must be in correct order
main_meta_fields = MainArticlePage._meta.fields
district_meta_fields = DistrictArticlePage._meta.fields
uniweb_meta_fields = UniwebArticlePage._meta.fields
......@@ -355,7 +404,7 @@ class ArticlesMixin(models.Model):
else assigned | {f"union_{field.column}": F(field.column)}
)
setup_fields_order = lambda orderBy, orderFrom: reduce(
lambda orderTo, field: orderTo | {f"{field}": orderFrom[field]},
lambda orderTo, field: self.merge_dict(orderTo, {field: orderFrom[field]}),
orderBy.keys(),
{},
)
......@@ -401,28 +450,12 @@ class ArticlesMixin(models.Model):
)
create_query_by_slug = lambda query: apply_additional_filter(
(
query.filter(
shared_tags__slug__in=self.shared_tags.values_list(
"slug", flat=True
)
)
)
.live()
.specific()
self.create_base_shared_query(query)
)
district_by_slug = create_query_by_slug(district_article_query)
if custom_article_query is not None:
district_by_slug = custom_article_query(district_by_slug)
uniweb_by_slug = create_query_by_slug(uniweb_article_query)
if custom_article_query is not None:
uniweb_by_slug = custom_article_query(uniweb_by_slug)
main_by_slug = create_query_by_slug(main_article_query)
if custom_article_query is not None:
main_by_slug = custom_article_query(main_by_slug)
shared_field = Value(
self.page_ptr.id,
......@@ -498,8 +531,11 @@ class ArticlesMixin(models.Model):
def materialize_shared_articles_query(self, results):
"""
To prevent circular deps, we get class models during runtime
Corresponding method to append_all_shared_articles_query.
Materializes article query as article type corresponding to the module from which
this function is run. Put query from append_all_shared_articles_query as results parameter.
"""
# To prevent circular deps, we get class models during runtime
page_type = self.determine_page_type()
DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage")
......@@ -510,9 +546,9 @@ class ArticlesMixin(models.Model):
district_meta_fields = DistrictArticlePage._meta.fields
uniweb_meta_fields = UniwebArticlePage._meta.fields
assign_to_model = lambda unioned: lambda assignment, field: assignment | {
field.column: unioned[f"union_{field.column}"]
}
assign_to_model = lambda unioned: lambda assignment, field: self.merge_dict(
assignment, {field.column: unioned[f"union_{field.column}"]}
)
evaluated = self.evaluate_page_query(
results
......@@ -551,6 +587,9 @@ class ArticlesMixin(models.Model):
def get_page_with_shared_articles(
self, query: models.QuerySet, page_size: int, page: int
):
"""
Returns a list based on articles query using Paginator internally.
"""
return self.materialize_shared_articles_query(
Paginator(
query,
......@@ -558,44 +597,54 @@ class ArticlesMixin(models.Model):
).get_page(page)
)
def append_all_shared_articles(
self, previous_query: models.QuerySet | None = None, custom_article_query=None
):
return self.materialize_shared_articles_query(
self.append_all_shared_articles_query(previous_query, custom_article_query)
)
def get_article_page_by_slug(self, slug: str):
articles = self.append_all_shared_articles(
"""
Filters articles + shared articles based on "tag" field,
returns first result sorted by date
"""
articles = self.append_all_shared_articles_query(
custom_article_query=lambda query: query.filter(slug=slug)
)
return articles[0]
)[:1]
return self.materialize_shared_articles_query(articles)[0]
def setup_article_page_context(self, request):
"""
Use this method to setup page context for shared article at /sdilene
"""
slug = request.GET.get("sdilene", "")
return self.get_article_page_by_slug(slug).serve(request)
def search_tags_by_unioned_id_query(
self,
articles: list,
tags_model_query=None,
):
if isinstance(articles, models.QuerySet):
def materialize_articles_as_id_only(self, articles):
"""
Returns a temporary article class with pk, shared and date as the only properties.
Useful when optimizing large article queries
"""
TmpArticle = namedtuple(
"TemporaryArticle", field_names=["page_ptr", "shared_type"]
)
TmpPrimaryKey = namedtuple("TemporaryPk", field_names=["id"])
articles = list(
return list(
map(
lambda unioned: TmpArticle(
page_ptr=TmpPrimaryKey(id=unioned["union_page_ptr_id"]),
shared_type=unioned["union_shared_type"],
),
articles.values(
"union_page_ptr_id", "union_shared_type", "union_date"
),
articles.values("union_page_ptr_id", "union_shared_type", "union_date"),
)
)
def search_tags_by_unioned_id_query(
self,
articles: list,
tags_model_query=None,
):
"""
Search tags based on article query or list of articles.
Returns a list of Tag objects
"""
if isinstance(articles, models.QuerySet):
articles = self.materialize_articles_as_id_only(articles)
own_page_type = self.determine_page_type()
get_ids_by_page_type = lambda page_type: list(
map(
......@@ -632,7 +681,73 @@ class ArticlesMixin(models.Model):
return tag_query
def search_articles(
self,
query: str,
page_size: int,
page: int,
previous_query: models.QuerySet | None = None,
):
"""
Uses wagtail search to lookup articles based on a phrase. Accepts a string phrase query + a previous_query param, which can be any articles query that you want to filter by.
Returns a list of articles with models based on from which module is this method run.
To optimize search results we use paginator internally
"""
DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage")
UniwebArticlePage = apps.get_model(app_label="uniweb.UniwebArticlePage")
MainArticlePage = apps.get_model(app_label="main.MainArticlePage")
# .search() runs annotate, so its impossible to search after .union()
# .search() also returns an object that cannot be broken down by .values()
# therefore, shared search has to happen here
search_factory = lambda search_query: list(
search_query.search(query).annotate_score("score")
)
current_query = search_factory(previous_query)
shared_district_search = search_factory(
self.create_base_shared_query(DistrictArticlePage.objects)
)
shared_uniweb_search = search_factory(
self.create_base_shared_query(UniwebArticlePage.objects)
)
shared_main_search = search_factory(
self.create_base_shared_query(MainArticlePage.objects)
)
# .search is not lazy either, making this the best optimized query possible AFAIK
sorted = self.unique_articles_by_id(
current_query
+ shared_district_search
+ shared_uniweb_search
+ shared_main_search
)
sorted.sort(key=lambda item: item.score)
sorted = Paginator(sorted, page_size).get_page(page)
sorted_ids = list(map(lambda article: article.pk, sorted))
converted_query = self.materialize_shared_articles_query(
self.append_all_shared_articles_query(
previous_query.filter(page_ptr_id__in=sorted_ids),
custom_article_query=lambda query: query.filter(
page_ptr_id__in=sorted_ids
),
)
)
converted_query_map = reduce(
lambda map, article: self.merge_dict(map, {article.pk: article}),
converted_query,
{},
)
sorted_final_result = []
for sorted_result in sorted:
sorted_final_result.append(converted_query_map[sorted_result.pk])
return sorted_final_result
def filter_by_tag_name(self, tag: str):
"""
Returns a dict which can be used to filter articles based on tag name
"""
return {
"tags__name": tag,
}
......
......@@ -23,6 +23,7 @@ from wagtail.contrib.table_block.blocks import TableBlock
from wagtail.fields import RichTextField, StreamField
from wagtail.images.blocks import ImageChooserBlock
from wagtail.models import Page
from wagtail.search import index
from wagtailmetadata.models import MetadataPageMixin
from calendar_utils.models import CalendarMixin
......@@ -580,6 +581,10 @@ class UniwebArticlePage(
blank=True,
)
search_fields = ArticleMixin.search_fields + [
index.FilterField("slug"),
]
### PANELS
content_panels = ArticleMixin.content_panels + [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment