diff --git a/district/models.py b/district/models.py index ebe030164be9a03e60be8e6ad91df68b890e1805..ccfd062d491fad0e7a69da49cb24b29fcd84fbfa 100644 --- a/district/models.py +++ b/district/models.py @@ -23,6 +23,7 @@ from wagtail.admin.panels import ( from wagtail.contrib.routable_page.models import RoutablePageMixin, route from wagtail.fields import RichTextField, StreamField from wagtail.models import Orderable, Page +from wagtail.search import index from wagtailmetadata.models import MetadataPageMixin from calendar_utils.models import CalendarMixin @@ -423,6 +424,11 @@ class DistrictArticlePage( related_name="thumb_image", ) + search_fields = ArticleMixin.search_fields + [ + index.SearchField("author_page"), + index.FilterField("slug"), + ] + ### PANELS content_panels = ArticleMixin.content_panels + [ @@ -563,8 +569,10 @@ class DistrictArticlesPage( # (tags__slug) context = super().get_context(request) - articles = self.append_all_shared_articles( - DistrictArticlePage.objects.child_of(self) + articles = self.materialize_articles_as_id_only( + self.append_all_shared_articles_query( + DistrictArticlePage.objects.child_of(self) + ) ) page_ids = list(map(lambda article: article.page_ptr.id, articles)) diff --git a/main/models.py b/main/models.py index 46c03417feda4fdf2071989779e2bfed1ed8506d..c0ccee1472980859643d71539450b466d121a943 100644 --- a/main/models.py +++ b/main/models.py @@ -234,8 +234,10 @@ class MainHomePage( .order_by("-date")[:3] ) - articles_for_article_section = self.append_all_shared_articles( - MainArticlePage.objects.filter(article_type=ARTICLE_TYPES.PRESS_RELEASE) + articles_for_article_section = self.materialize_shared_articles_query( + self.append_all_shared_articles_query( + MainArticlePage.objects.filter(article_type=ARTICLE_TYPES.PRESS_RELEASE) + )[:8] ) context["article_main"] = ( articles_for_article_section[0] if articles_for_article_section else None @@ -252,14 +254,10 @@ class MainHomePage( ).order_by("-date") context = {"article_data_list": sorted_article_qs[:3]} else: - sorted_article_qs = self.append_all_shared_articles_query( - MainArticlePage.objects.filter(region=request.GET.get("region", None)) - ) - context = { - "article_data_list": self.materialize_shared_articles_query( - sorted_article_qs[:3] - ) - } + sorted_article_qs = MainArticlePage.objects.filter( + region=request.GET.get("region", None) + )[:3] + context = {"article_data_list": sorted_article_qs[:3]} data = { "html": render( request, "main/includes/small_article_preview.html", context @@ -521,15 +519,11 @@ class MainArticlesPage( return JsonResponse(data=data, safe=False) def get_all_articles_search_response(self, request): - article_page = self.get_page_with_shared_articles( - self.append_all_shared_articles_query( - MainArticlePage.objects.search( - request.GET["q"], - ), - custom_article_query=lambda query: query.search(request.GET["q"]), - ), + article_page = self.search_articles( + request.GET["q"], 10, request.GET.get("page", 1), + MainArticlePage.objects.descendant_of(self), ) context = {"article_data_list": article_page.object_list} html_content = render( @@ -554,11 +548,8 @@ class MainArticlesPage( if request.method == "GET" and "q" in request.GET: query = request.GET["q"] - article_results = self.materialize_shared_articles_query( - self.append_all_shared_articles_query( - MainArticlePage.objects.search(query), - custom_article_query=lambda query: query.search(query), - )[:11] + article_results = self.search_articles( + query, 11, 1, MainArticlePage.objects.descendant_of(self) ) return render( @@ -648,12 +639,9 @@ class MainArticlePage( blank=True, ) - search_fields = Page.search_fields + [ - index.SearchField("title"), - index.SearchField("author"), + search_fields = ArticleMixin.search_fields + [ index.SearchField("author_page"), - index.SearchField("perex"), - index.SearchField("content"), + index.FilterField("slug"), ] ### PANELS diff --git a/shared/models.py b/shared/models.py index 1116749c8a01a9297ea0673486cc7f212c451272..3528d5b76e384ab863a4ba5ac71cc8adcfc1e399 100644 --- a/shared/models.py +++ b/shared/models.py @@ -14,6 +14,7 @@ from taggit.models import ItemBase, Tag, TagBase from wagtail.admin.panels import FieldPanel, MultiFieldPanel, PublishingPanel from wagtail.fields import StreamField from wagtail.models import Page +from wagtail.search import index from shared.blocks import ( DEFAULT_CONTENT_BLOCKS, @@ -78,6 +79,13 @@ class ArticleMixin(models.Model): on_delete=models.PROTECT, ) # hidden field to indicate the article is from another page + search_fields = Page.search_fields + [ + index.SearchField("title"), + index.SearchField("author"), + index.SearchField("perex"), + index.SearchField("content"), + ] + ### PANELS content_panels = Page.content_panels + [ @@ -305,7 +313,17 @@ class ArticlesMixin(models.Model): content_panels = Page.content_panels + [FieldPanel("shared_tags")] + def merge_dict(self, aDict: dict, bDict: dict): + """ + Utility for efficiently merging dict objects in lambda queries + """ + aDict.update(bDict) + return aDict + def determine_page_type(self): + """ + Determines which article type to use based on the module from which this method is run from + """ if self._meta.app_label == "district": return SharedArticlesPageType.DISTRICT elif self._meta.app_label == "uniweb": @@ -314,37 +332,68 @@ class ArticlesMixin(models.Model): return SharedArticlesPageType.MAIN def evaluate_page_query(self, results): + """ + Utility for merging and materializing articles query to prevent duplicities. + Prefers original articles as opposed to shared ones (if we share an article to the same web that it originates from) + """ return list( reduce( lambda unique, item: unique if item["union_page_ptr_id"] in unique and "union_shared_from_id" in item - else unique | {f"{item['union_page_ptr_id']}": item}, + and item["union_shared_from_id"] is not None + else self.merge_dict(unique, {item["union_page_ptr_id"]: item}), list(results), {}, ).values() ) + def unique_articles_by_id(self, results): + """ + Utility creating an unique results list with preference for non-shared articles + Prefers original articles as opposed to shared ones (if we share an article to the same web that it originates from) + """ + return list( + reduce( + lambda unique, item: unique + if item.page_ptr.id in unique and item.shared_from is not None + else self.merge_dict(unique, {item.page_ptr.id: item}), + results, + {}, + ).values() + ) + + def create_base_shared_query(self, query): + """ + Returns a query filtered by shared tags + """ + return ( + query.filter( + shared_tags__slug__in=self.shared_tags.values_list("slug", flat=True) + ) + .live() + .specific() + ) + def append_all_shared_articles_query( self, previous_query: models.QuerySet | None = None, custom_article_query=None, ): """ - To prevent circular deps, we get class models during runtime + Creates articles query with shared articles as well as articles pre-selected by previous_query parameter + Returns an unionized query with .values() being applied on it. Unionized queries cannot be annotated or filtered. + If you wish to run annotation or additional filters, use custom_article_query param. This parameter accepts lambdas with + two parameters: shared article query (before unionizing) and shared articles enum, denoting the origin of shared articles """ - + # To prevent circular deps, we get class models during runtime DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage") UniwebArticlePage = apps.get_model(app_label="uniweb.UniwebArticlePage") MainArticlePage = apps.get_model(app_label="main.MainArticlePage") page_type = self.determine_page_type() - """ - In order to balance union() requirements for tables with same-fields only, we are adding null fields using values(). - These values must be in correct order - """ - + # In order to balance union() requirements for tables with same-fields only, we are adding null fields using values(). These values must be in correct order main_meta_fields = MainArticlePage._meta.fields district_meta_fields = DistrictArticlePage._meta.fields uniweb_meta_fields = UniwebArticlePage._meta.fields @@ -355,7 +404,7 @@ class ArticlesMixin(models.Model): else assigned | {f"union_{field.column}": F(field.column)} ) setup_fields_order = lambda orderBy, orderFrom: reduce( - lambda orderTo, field: orderTo | {f"{field}": orderFrom[field]}, + lambda orderTo, field: self.merge_dict(orderTo, {field: orderFrom[field]}), orderBy.keys(), {}, ) @@ -401,28 +450,12 @@ class ArticlesMixin(models.Model): ) create_query_by_slug = lambda query: apply_additional_filter( - ( - query.filter( - shared_tags__slug__in=self.shared_tags.values_list( - "slug", flat=True - ) - ) - ) - .live() - .specific() + self.create_base_shared_query(query) ) district_by_slug = create_query_by_slug(district_article_query) - if custom_article_query is not None: - district_by_slug = custom_article_query(district_by_slug) - uniweb_by_slug = create_query_by_slug(uniweb_article_query) - if custom_article_query is not None: - uniweb_by_slug = custom_article_query(uniweb_by_slug) - main_by_slug = create_query_by_slug(main_article_query) - if custom_article_query is not None: - main_by_slug = custom_article_query(main_by_slug) shared_field = Value( self.page_ptr.id, @@ -498,8 +531,11 @@ class ArticlesMixin(models.Model): def materialize_shared_articles_query(self, results): """ - To prevent circular deps, we get class models during runtime + Corresponding method to append_all_shared_articles_query. + Materializes article query as article type corresponding to the module from which + this function is run. Put query from append_all_shared_articles_query as results parameter. """ + # To prevent circular deps, we get class models during runtime page_type = self.determine_page_type() DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage") @@ -510,9 +546,9 @@ class ArticlesMixin(models.Model): district_meta_fields = DistrictArticlePage._meta.fields uniweb_meta_fields = UniwebArticlePage._meta.fields - assign_to_model = lambda unioned: lambda assignment, field: assignment | { - field.column: unioned[f"union_{field.column}"] - } + assign_to_model = lambda unioned: lambda assignment, field: self.merge_dict( + assignment, {field.column: unioned[f"union_{field.column}"]} + ) evaluated = self.evaluate_page_query( results @@ -551,6 +587,9 @@ class ArticlesMixin(models.Model): def get_page_with_shared_articles( self, query: models.QuerySet, page_size: int, page: int ): + """ + Returns a list based on articles query using Paginator internally. + """ return self.materialize_shared_articles_query( Paginator( query, @@ -558,44 +597,54 @@ class ArticlesMixin(models.Model): ).get_page(page) ) - def append_all_shared_articles( - self, previous_query: models.QuerySet | None = None, custom_article_query=None - ): - return self.materialize_shared_articles_query( - self.append_all_shared_articles_query(previous_query, custom_article_query) - ) - def get_article_page_by_slug(self, slug: str): - articles = self.append_all_shared_articles( + """ + Filters articles + shared articles based on "tag" field, + returns first result sorted by date + """ + articles = self.append_all_shared_articles_query( custom_article_query=lambda query: query.filter(slug=slug) - ) - return articles[0] + )[:1] + return self.materialize_shared_articles_query(articles)[0] def setup_article_page_context(self, request): + """ + Use this method to setup page context for shared article at /sdilene + """ slug = request.GET.get("sdilene", "") return self.get_article_page_by_slug(slug).serve(request) + def materialize_articles_as_id_only(self, articles): + """ + Returns a temporary article class with pk, shared and date as the only properties. + Useful when optimizing large article queries + """ + TmpArticle = namedtuple( + "TemporaryArticle", field_names=["page_ptr", "shared_type"] + ) + TmpPrimaryKey = namedtuple("TemporaryPk", field_names=["id"]) + return list( + map( + lambda unioned: TmpArticle( + page_ptr=TmpPrimaryKey(id=unioned["union_page_ptr_id"]), + shared_type=unioned["union_shared_type"], + ), + articles.values("union_page_ptr_id", "union_shared_type", "union_date"), + ) + ) + def search_tags_by_unioned_id_query( self, articles: list, tags_model_query=None, ): + """ + Search tags based on article query or list of articles. + Returns a list of Tag objects + """ if isinstance(articles, models.QuerySet): - TmpArticle = namedtuple( - "TemporaryArticle", field_names=["page_ptr", "shared_type"] - ) - TmpPrimaryKey = namedtuple("TemporaryPk", field_names=["id"]) - articles = list( - map( - lambda unioned: TmpArticle( - page_ptr=TmpPrimaryKey(id=unioned["union_page_ptr_id"]), - shared_type=unioned["union_shared_type"], - ), - articles.values( - "union_page_ptr_id", "union_shared_type", "union_date" - ), - ) - ) + articles = self.materialize_articles_as_id_only(articles) + own_page_type = self.determine_page_type() get_ids_by_page_type = lambda page_type: list( map( @@ -632,7 +681,73 @@ class ArticlesMixin(models.Model): return tag_query + def search_articles( + self, + query: str, + page_size: int, + page: int, + previous_query: models.QuerySet | None = None, + ): + """ + Uses wagtail search to lookup articles based on a phrase. Accepts a string phrase query + a previous_query param, which can be any articles query that you want to filter by. + Returns a list of articles with models based on from which module is this method run. + To optimize search results we use paginator internally + """ + DistrictArticlePage = apps.get_model(app_label="district.DistrictArticlePage") + UniwebArticlePage = apps.get_model(app_label="uniweb.UniwebArticlePage") + MainArticlePage = apps.get_model(app_label="main.MainArticlePage") + + # .search() runs annotate, so its impossible to search after .union() + # .search() also returns an object that cannot be broken down by .values() + # therefore, shared search has to happen here + search_factory = lambda search_query: list( + search_query.search(query).annotate_score("score") + ) + current_query = search_factory(previous_query) + shared_district_search = search_factory( + self.create_base_shared_query(DistrictArticlePage.objects) + ) + shared_uniweb_search = search_factory( + self.create_base_shared_query(UniwebArticlePage.objects) + ) + shared_main_search = search_factory( + self.create_base_shared_query(MainArticlePage.objects) + ) + + # .search is not lazy either, making this the best optimized query possible AFAIK + sorted = self.unique_articles_by_id( + current_query + + shared_district_search + + shared_uniweb_search + + shared_main_search + ) + sorted.sort(key=lambda item: item.score) + sorted = Paginator(sorted, page_size).get_page(page) + sorted_ids = list(map(lambda article: article.pk, sorted)) + + converted_query = self.materialize_shared_articles_query( + self.append_all_shared_articles_query( + previous_query.filter(page_ptr_id__in=sorted_ids), + custom_article_query=lambda query: query.filter( + page_ptr_id__in=sorted_ids + ), + ) + ) + converted_query_map = reduce( + lambda map, article: self.merge_dict(map, {article.pk: article}), + converted_query, + {}, + ) + sorted_final_result = [] + for sorted_result in sorted: + sorted_final_result.append(converted_query_map[sorted_result.pk]) + + return sorted_final_result + def filter_by_tag_name(self, tag: str): + """ + Returns a dict which can be used to filter articles based on tag name + """ return { "tags__name": tag, } diff --git a/uniweb/models.py b/uniweb/models.py index 668be1e754755c06386c3d088c3fbeb71a330498..b623a6c265d903b94b1c122925b36d66efeff674 100644 --- a/uniweb/models.py +++ b/uniweb/models.py @@ -23,6 +23,7 @@ from wagtail.contrib.table_block.blocks import TableBlock from wagtail.fields import RichTextField, StreamField from wagtail.images.blocks import ImageChooserBlock from wagtail.models import Page +from wagtail.search import index from wagtailmetadata.models import MetadataPageMixin from calendar_utils.models import CalendarMixin @@ -580,6 +581,10 @@ class UniwebArticlePage( blank=True, ) + search_fields = ArticleMixin.search_fields + [ + index.FilterField("slug"), + ] + ### PANELS content_panels = ArticleMixin.content_panels + [