From 5de6ddfdf7b3cd164195e5925f455f318335434d Mon Sep 17 00:00:00 2001
From: "jindra12.underdark" <jindra12.underdark@gmail.com>
Date: Tue, 18 Jul 2023 00:15:55 +0200
Subject: [PATCH] Stop duplicates

#210
---
 district/models.py | 35 +++++++++++-------
 main/models.py     | 13 +++++++
 shared/models.py   | 90 ++++++++++++++++++++++++++++++++--------------
 uniweb/models.py   | 36 +++++++++++++++----
 4 files changed, 128 insertions(+), 46 deletions(-)

diff --git a/district/models.py b/district/models.py
index 44226467..dc205b42 100644
--- a/district/models.py
+++ b/district/models.py
@@ -458,7 +458,14 @@ class DistrictArticlePage(
     @property
     def get_tags(self):
         if self.shared_from is not None:
-            return self.articles_page.search_tags_by_article_id([self.pk])
+            return self.articles_page.search_tags_by_unioned_id_query(
+                [
+                    {
+                        "union_page_ptr_id": self.pk,
+                        "union_page_kind": "district",
+                    }
+                ]
+            )
         return self.tags.all
 
     def get_context(self, request):
@@ -565,19 +572,23 @@ class DistrictArticlesPage(
         # (tags__slug)
         context = super().get_context(request)
 
-        site_article_ids = self.append_all_shared_articles_ids(
+        article_ids_with_type = self.append_all_shared_articles_ids_with_type(
             DistrictArticlePage.objects.child_of(self)
         )
 
+        page_ids = list(
+            map(lambda unioned: unioned["union_page_ptr_id"], article_ids_with_type)
+        )
+
         # Naplním "tag" a "article_page_list" parametry
-        context.update(**self.get_tag_and_articles(request, site_article_ids))
-        context["tag_list"] = self.get_tag_qs(site_article_ids)
+        context.update(**self.get_tag_and_articles(request, page_ids))
+        context["tag_list"] = self.get_tag_qs(article_ids_with_type)
 
         # Pro obecnou paginaci posílám "extra_query", abych si podržel tag pro další GET
         context["extra_query"] = "&tag={}".format(request.GET.get("tag", ""))
         return context
 
-    def get_tag_and_articles(self, request, site_article_ids: list) -> dict:
+    def get_tag_and_articles(self, request, page_ids: list) -> dict:
         """
         Vrátí vyfiltrované články podle tagu a page query pro z daného "výběru"
         pro danou stránku (site_article_ids). Lepší by bylo články a tag řešit
@@ -591,17 +602,17 @@ class DistrictArticlesPage(
             tag = Tag.objects.filter(slug=request.GET["tag"])[0]
             article_page_qs = self.append_all_shared_articles(
                 DistrictArticlePage.objects.filter(
-                    id__in=site_article_ids, tags__slug=tag.slug
+                    page_ptr_id__in=page_ids, tags__slug=tag.slug
                 ),
                 filter=lambda shared: shared.filter(
-                    id__in=site_article_ids, tags__slug=tag.slug
+                    page_ptr_id__in=page_ids, tags__slug=tag.slug
                 ),
             )
         except (KeyError, IndexError):
             tag = None
             article_page_qs = self.append_all_shared_articles(
-                DistrictArticlePage.objects.filter(id__in=site_article_ids),
-                filter=lambda shared: shared.filter(id__in=site_article_ids),
+                DistrictArticlePage.objects.filter(page_ptr_id__in=page_ids),
+                filter=lambda shared: shared.filter(page_ptr_id__in=page_ids),
             )
 
         return {
@@ -612,13 +623,13 @@ class DistrictArticlesPage(
             "tag": tag,
         }
 
-    def get_tag_qs(self, site_article_ids: list) -> models.QuerySet:
+    def get_tag_qs(self, article_ids_with_type: list) -> models.QuerySet:
         """
         Getuje Tagy pouze pro DistrictArticlePage omezeno IDčky getnutých přes
         root_page. Počítá, kolik článků je s daným tagem.
         """
-        return self.search_tags_by_article_id(
-            site_article_ids,
+        return self.search_tags_by_unioned_id_query(
+            article_ids_with_type,
             tags_model_query=lambda query: query.annotate(count=models.Count("slug"))
             .order_by("slug")
             .values("name", "slug", "count"),
diff --git a/main/models.py b/main/models.py
index 5d6f9e8b..4fd9eccb 100644
--- a/main/models.py
+++ b/main/models.py
@@ -666,6 +666,19 @@ class MainArticlePage(
     class Meta:
         verbose_name = "Aktualita"
 
+    @property
+    def get_tags(self):
+        if self.shared_from is not None:
+            return self.articles_page.search_tags_by_unioned_id_query(
+                [
+                    {
+                        "union_page_ptr_id": self.pk,
+                        "union_page_kind": "uniweb",
+                    }
+                ]
+            )
+        return self.tags.all
+
     # def get_context(self, request): chceme/nechceme?
     #     context = super().get_context(request)
     #     context["related_articles"] = (
diff --git a/shared/models.py b/shared/models.py
index 2171001b..94479001 100644
--- a/shared/models.py
+++ b/shared/models.py
@@ -282,8 +282,8 @@ class SharedTaggedMainArticle(ItemBase):
 
 
 class SharedArticlesPageType(Enum):
-    DISTRICT = ("district",)
-    UNIWEB = ("uniweb",)
+    DISTRICT = "district"
+    UNIWEB = "uniweb"
     MAIN = "main"
 
 
@@ -305,6 +305,15 @@ class ArticlesMixin(models.Model):
         elif self._meta.app_label == "main":
             return SharedArticlesPageType.MAIN
 
+    def unique_page_query_materialized(self, results):
+        return list(
+            reduce(
+                lambda unique, item: unique | {f"{item['union_page_ptr_id']}": item},
+                list(results),
+                {},
+            ).values()
+        )
+
     def append_all_shared_articles_query(
         self, previous_query: models.QuerySet | None = None, filter=None
     ):
@@ -340,6 +349,9 @@ class ArticlesMixin(models.Model):
 
         main_fields = reduce(fields_reducer, main_meta_fields, {}) | {
             "union_thumb_image_id": F("search_image_id"),
+            "union_page_kind": Value(
+                SharedArticlesPageType.MAIN.value, output_field=models.CharField()
+            ),
         }
 
         district_fields = setup_fields_order(
@@ -348,6 +360,10 @@ class ArticlesMixin(models.Model):
             | {
                 "union_region": Value("", models.CharField()),
                 "union_article_type": Value(0, models.PositiveSmallIntegerField()),
+                "union_page_kind": Value(
+                    SharedArticlesPageType.DISTRICT.value,
+                    output_field=models.CharField(),
+                ),
             },
         )
 
@@ -365,6 +381,9 @@ class ArticlesMixin(models.Model):
                 "union_article_type": Value(0, models.PositiveSmallIntegerField()),
                 "union_is_black": Value(False, models.BooleanField()),
                 "union_thumb_image_id": F("search_image_id"),
+                "union_page_kind": Value(
+                    SharedArticlesPageType.UNIWEB.value, output_field=models.CharField()
+                ),
             },
         )
 
@@ -407,24 +426,20 @@ class ArticlesMixin(models.Model):
             ),
         )
 
-        results = (
-            main_by_slug.values(
-                **main_fields,
-                union_shared_from_id=shared_field,
-            )
-            .union(
-                uniweb_by_slug.values(
-                    **uniweb_fields,
-                    union_shared_from_id=shared_field,
-                )
-            )
-            .union(
-                district_by_slug.values(
-                    **district_fields,
-                    union_shared_from_id=shared_field,
-                )
-            )
+        main_by_values = main_by_slug.values(
+            **main_fields,
+            union_shared_from_id=shared_field,
         )
+        uniweb_by_values = uniweb_by_slug.values(
+            **uniweb_fields,
+            union_shared_from_id=shared_field,
+        )
+        district_by_values = district_by_slug.values(
+            **district_fields,
+            union_shared_from_id=shared_field,
+        )
+
+        results = main_by_values.union(uniweb_by_values).union(district_by_values)
 
         empty_shared_field = Value(
             None,
@@ -456,11 +471,13 @@ class ArticlesMixin(models.Model):
 
         return results.order_by("union_date")
 
-    def append_all_shared_articles_ids(
+    def append_all_shared_articles_ids_with_type(
         self, previous_query: models.QuerySet | None = None, filter=None
     ):
         results = self.append_all_shared_articles_query(previous_query, filter)
-        return results.values_list("id", flat=True)
+        return self.unique_page_query_materialized(
+            results.values("union_page_ptr_id", "union_page_kind")
+        )
 
     def append_all_shared_articles(
         self, previous_query: models.QuerySet | None = None, filter=None
@@ -480,7 +497,7 @@ class ArticlesMixin(models.Model):
 
         results = self.append_all_shared_articles_query(previous_query, filter)
 
-        evaluated = list(
+        evaluated = self.unique_page_query_materialized(
             results
         )  # We MUST eval here since we can't turn values() into concrete class instances in QuerySet after union
 
@@ -528,21 +545,40 @@ class ArticlesMixin(models.Model):
         slug = request.GET.get("sdilene", "")
         return self.get_article_page_by_slug(slug).serve(request)
 
-    def search_tags_by_article_id(
-        self, site_article_ids: list, additional_query=None, tags_model_query=None
+    def search_tags_by_unioned_id_query(
+        self,
+        site_article_id_page_list: list,
+        additional_query=None,
+        tags_model_query=None,
     ):
         DistrictArticleTag = apps.get_model(app_label="district.DistrictArticleTag")
         UniwebArticleTag = apps.get_model(app_label="uniweb.UniwebArticleTag")
         MainArticleTag = apps.get_model(app_label="main.MainArticleTag")
 
+        get_ids_by_page_type = lambda page_type: list(
+            map(
+                lambda unioned: unioned["union_page_ptr_id"],
+                filter(
+                    lambda unioned: unioned["union_page_kind"] == page_type,
+                    site_article_id_page_list,
+                ),
+            )
+        )
+
         district_tags = DistrictArticleTag.objects.filter(
-            content_object_id__in=site_article_ids
+            content_object_id__in=get_ids_by_page_type(
+                SharedArticlesPageType.DISTRICT.value
+            )
         )
         uniweb_tags = UniwebArticleTag.objects.filter(
-            content_object_id__in=site_article_ids
+            content_object_id__in=get_ids_by_page_type(
+                SharedArticlesPageType.UNIWEB.value
+            )
         )
         main_tags = MainArticleTag.objects.filter(
-            content_object_id__in=site_article_ids
+            content_object_id__in=get_ids_by_page_type(
+                SharedArticlesPageType.MAIN.value
+            )
         )
 
         if additional_query is not None:
diff --git a/uniweb/models.py b/uniweb/models.py
index e28e03c2..1fbfd211 100644
--- a/uniweb/models.py
+++ b/uniweb/models.py
@@ -536,13 +536,13 @@ class UniwebArticlesIndexPage(
 
     ### OTHERS
 
+    class Meta:
+        verbose_name = "Sekce článků"
+
     @route(r"^sdilene/$", name="shared")
     def shared(self, request):
         return self.setup_article_page_context(request)
 
-    class Meta:
-        verbose_name = "Sekce článků"
-
     def get_context(self, request):
         context = super().get_context(request)
         num = request.GET.get("page")
@@ -550,15 +550,24 @@ class UniwebArticlesIndexPage(
 
         tag_params = self.search_tags_by_tag_name(tag)
 
+        own_children = UniwebArticlePage.objects.child_of(self)
+
         articles = self.append_all_shared_articles(
-            UniwebArticlePage.objects.child_of(self),
-            filter=lambda articles: articles.filter(**tag_params),
+            own_children,
+            filter=lambda articles: articles.filter(**tag_params)
+            if tag is not None
+            else articles,
         )
 
-        articles_ids = list(map(lambda article: article.pk, articles))
+        articles_ids = self.append_all_shared_articles_ids_with_type(
+            own_children,
+            filter=lambda articles: articles.filter(**tag_params)
+            if tag is not None
+            else articles,
+        )
 
         context["articles"] = Paginator(articles, ARTICLES_PER_PAGE).get_page(num)
-        context["tags"] = self.search_tags_by_article_id(articles_ids)
+        context["tags"] = self.search_tags_by_unioned_id_query(articles_ids)
         context["active_tag"] = tag
         return context
 
@@ -597,6 +606,19 @@ class UniwebArticlePage(
     class Meta:
         verbose_name = "Článek"
 
+    @property
+    def get_tags(self):
+        if self.shared_from is not None:
+            return self.articles_page.search_tags_by_unioned_id_query(
+                [
+                    {
+                        "union_page_ptr_id": self.pk,
+                        "union_page_kind": "uniweb",
+                    }
+                ]
+            )
+        return self.tags.all
+
     def get_context(self, request):
         context = super().get_context(request)
         context["related_articles"] = (
-- 
GitLab