From 6ee477c90a65e07132a320ab2bd248a25231f841 Mon Sep 17 00:00:00 2001
From: OndraRehounek <ondra.rehounek@seznam.cz>
Date: Fri, 19 Aug 2022 11:48:28 +0200
Subject: [PATCH] twitter_utils

---
 .isort.cfg                                    |  2 +-
 README.md                                     |  1 +
 main/twitter.py                               |  3 +
 majak/settings/base.py                        |  3 +
 requirements/base.in                          |  1 +
 requirements/base.txt                         | 14 +++-
 twitter_utils/__init__.py                     |  0
 twitter_utils/apps.py                         |  5 ++
 twitter_utils/management/__init__.py          |  0
 twitter_utils/management/commands/__init__.py |  0
 .../management/commands/update_tweets.py      | 24 ++++++
 twitter_utils/migrations/0001_initial.py      | 40 ++++++++++
 twitter_utils/migrations/__init__.py          |  0
 twitter_utils/models.py                       | 17 ++++
 twitter_utils/services.py                     | 77 +++++++++++++++++++
 15 files changed, 184 insertions(+), 3 deletions(-)
 create mode 100644 main/twitter.py
 create mode 100644 twitter_utils/__init__.py
 create mode 100644 twitter_utils/apps.py
 create mode 100644 twitter_utils/management/__init__.py
 create mode 100644 twitter_utils/management/commands/__init__.py
 create mode 100644 twitter_utils/management/commands/update_tweets.py
 create mode 100644 twitter_utils/migrations/0001_initial.py
 create mode 100644 twitter_utils/migrations/__init__.py
 create mode 100644 twitter_utils/models.py
 create mode 100644 twitter_utils/services.py

diff --git a/.isort.cfg b/.isort.cfg
index d782d6a4..642718cf 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -3,4 +3,4 @@
 line_length = 88
 multi_line_output = 3
 include_trailing_comma = true
-known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo
+known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,tweepy,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo
diff --git a/README.md b/README.md
index f19d903a..7440ea63 100644
--- a/README.md
+++ b/README.md
@@ -174,6 +174,7 @@ Přes CRON je třeba na pozadí spouštět Django `manage.py` commandy:
 * `publish_scheduled_pages` - publikuje naplánované stránky (každou hodinu)
 * `update_callendars` - stáhne a aktualizuje kalendáře (několikrát denně)
 * `update_redmine_issues` - aktualizuje programované body MS a KS stránek napojených na Redmine (několikrát denně)
+* `update_tweets` - aktualizuje tweety z účtu PiratskaStrana (každou hodinu) - vyžaduje mít v .env TWITTER_BEARER_TOKEN
 
 ### Fulltextové vyhledávání v češtině
 
diff --git a/main/twitter.py b/main/twitter.py
new file mode 100644
index 00000000..7074b009
--- /dev/null
+++ b/main/twitter.py
@@ -0,0 +1,3 @@
+import tweepy
+
+print(public_tweets)
diff --git a/majak/settings/base.py b/majak/settings/base.py
index e1e42cb7..7e52665f 100644
--- a/majak/settings/base.py
+++ b/majak/settings/base.py
@@ -46,6 +46,7 @@ INSTALLED_APPS = [
     "calendar_utils",
     "maps_utils",
     "redmine_utils",
+    "twitter_utils",
     "users",
     "pirates",
     "tuning",
@@ -303,3 +304,5 @@ ELECTIONS2021_NEWSLETTER_SOURCE = env.str(
 MAPS_UTILS_MAPPROXY_URL = env.str(
     "MAPPROXY_URL", default="https://mapproxy.pir-test.eu"
 )
+
+TWITTER_BEARER_TOKEN = env.str("TWITTER_BEARER_TOKEN")
diff --git a/requirements/base.in b/requirements/base.in
index 88cf58c9..a5efaec9 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -24,3 +24,4 @@ weasyprint
 pypdf2
 pyyaml
 fastjsonschema
+tweepy
diff --git a/requirements/base.txt b/requirements/base.txt
index 88989f37..ca34b60d 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,12 +4,12 @@
 #
 #    pip-compile base.in
 #
+--extra-index-url https://pip:glpat-KpSQYD89Uw-zz3VJTMmT@gitlab.com/api/v4/groups/6879633/-/packages/pypi/simple
+
 amqp==5.1.1
     # via kombu
 anyascii==0.3.1
     # via wagtail
-appnope==0.1.3
-    # via ipython
 arrow==1.2.2
     # via
     #   -r base.in
@@ -154,6 +154,10 @@ mozilla-django-oidc==2.0.0
     # via pirates
 numpy==1.23.1
     # via opencv-python
+oauthlib==3.2.0
+    # via
+    #   requests-oauthlib
+    #   tweepy
 opencv-python==4.6.0.66
     # via -r base.in
 openpyxl==3.0.10
@@ -222,7 +226,11 @@ requests==2.28.1
     # via
     #   -r base.in
     #   mozilla-django-oidc
+    #   requests-oauthlib
+    #   tweepy
     #   wagtail
+requests-oauthlib==1.3.1
+    # via tweepy
 sentry-sdk==1.9.2
     # via -r base.in
 six==1.16.0
@@ -254,6 +262,8 @@ traitlets==5.3.0
     # via
     #   ipython
     #   matplotlib-inline
+tweepy==4.10.0
+    # via -r base.in
 urllib3==1.26.11
     # via
     #   requests
diff --git a/twitter_utils/__init__.py b/twitter_utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/twitter_utils/apps.py b/twitter_utils/apps.py
new file mode 100644
index 00000000..abd73ac3
--- /dev/null
+++ b/twitter_utils/apps.py
@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+
+class TwitterUtilsConfig(AppConfig):
+    name = "twitter_utils"
diff --git a/twitter_utils/management/__init__.py b/twitter_utils/management/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/twitter_utils/management/commands/__init__.py b/twitter_utils/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/twitter_utils/management/commands/update_tweets.py b/twitter_utils/management/commands/update_tweets.py
new file mode 100644
index 00000000..cdefb21b
--- /dev/null
+++ b/twitter_utils/management/commands/update_tweets.py
@@ -0,0 +1,24 @@
+from django.conf import settings
+from django.core.management.base import BaseCommand
+
+from ...services import TweetDownloadService
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+
+        parser.add_argument(
+            "--days_back",
+            default=1,
+            required=False,
+            type=int,
+            help="Stáří tweetů ve dnech",
+        )
+
+    def handle(self, *args, **options):
+        tds = TweetDownloadService(
+            bearer_token=settings.TWITTER_BEARER_TOKEN, days_back=options["days_back"]
+        )
+        tds.perform_update()
+
+        self.stdout.write("\nUpdating tweets finished!")
diff --git a/twitter_utils/migrations/0001_initial.py b/twitter_utils/migrations/0001_initial.py
new file mode 100644
index 00000000..a5e2a391
--- /dev/null
+++ b/twitter_utils/migrations/0001_initial.py
@@ -0,0 +1,40 @@
+# Generated by Django 4.0.7 on 2022-08-19 08:35
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="Tweet",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "author_img_url",
+                    models.URLField(
+                        default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg"
+                    ),
+                ),
+                ("author_name", models.CharField(default="Piráti", max_length=128)),
+                (
+                    "author_username",
+                    models.CharField(default="PiratskaStrana", max_length=128),
+                ),
+                ("text", models.TextField()),
+                ("twitter_id", models.CharField(max_length=32, unique=True)),
+            ],
+        ),
+    ]
diff --git a/twitter_utils/migrations/__init__.py b/twitter_utils/migrations/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/twitter_utils/models.py b/twitter_utils/models.py
new file mode 100644
index 00000000..8453dfb3
--- /dev/null
+++ b/twitter_utils/models.py
@@ -0,0 +1,17 @@
+from django.db import models
+
+
+class Tweet(models.Model):
+    """
+    Model pro ukládání Tweetů getnutých v manage commandu update_tweets z API Twitteru.
+    ID Tweetu ukládáme jako string, protože i limit BigInt je stejného řádu jako ID
+    nejnovějších Tweetů (2022).
+    """
+
+    author_img_url = models.URLField(
+        default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg"
+    )  # TODO consider another default, maybe from static
+    author_name = models.CharField(max_length=128, default="Piráti")
+    author_username = models.CharField(max_length=128, default="PiratskaStrana")
+    text = models.TextField()
+    twitter_id = models.CharField(max_length=32, unique=True)
diff --git a/twitter_utils/services.py b/twitter_utils/services.py
new file mode 100644
index 00000000..61cf6b72
--- /dev/null
+++ b/twitter_utils/services.py
@@ -0,0 +1,77 @@
+from datetime import timedelta
+
+from django.utils import timezone
+from tweepy import Client
+
+from .models import Tweet
+
+
+class TweetDownloadService:
+    client: Client
+    days_back: int
+
+    def __init__(self, bearer_token, days_back=1):
+        if not bearer_token:
+            raise RuntimeError("Twitter bearer token not set, cannot update tweets")
+
+        self.client = Client(bearer_token=bearer_token)
+        self.days_back = days_back
+
+    @staticmethod
+    def get_latest_saved_tweet_id() -> list[int]:
+        """
+        Vrací IDs už uložených Tweetů - možná by stálo za to brát jen z určitého
+        časového období...
+        """
+        return Tweet.objects.values_list("twitter_id", flat=True)
+
+    def get_tweets_response(self, user_id) -> list:
+        """
+        Vrací list tweetů (objektů) pro daného Twitter uživatele.
+        """
+        tweets_response = self.client.get_users_tweets(
+            user_id,
+            expansions=["author_id", "entities.mentions.username"],
+            max_results=100,
+            start_time=timezone.now() - timedelta(days=self.days_back),
+            tweet_fields=["author_id", "created_at"],
+            user_fields=["name", "username"],
+        )  # 49022430
+
+        return tweets_response.data
+
+    def get_user_response(self) -> dict:
+        """
+        Vrací informace o uživateli "PiratskaStrana"
+        Tento call není až tak potřeba, pokud bychom zahardcodili ID 49022430
+        """
+        user_response = self.client.get_user(
+            username="PiratskaStrana",
+            user_fields=["profile_image_url"],  # id, name, username enabled by default
+        )
+
+        return user_response.data
+
+    def perform_update(self) -> int:
+        """
+        Obaluje celý proces downloadu Tweetů z API do DB.
+        """
+        user_data_dict = self.get_user_response()
+        downloaded_tweets_list = self.get_tweets_response(user_id=user_data_dict["id"])
+        existing_tweet_id_list = self.get_latest_saved_tweet_id()
+
+        tweets_to_save = []
+
+        for tweet in downloaded_tweets_list:
+            if str(tweet.id) not in existing_tweet_id_list:
+                tweets_to_save.append(
+                    Tweet(
+                        author_img_url=user_data_dict["profile_image_url"],
+                        author_name=user_data_dict["name"],
+                        author_username=user_data_dict["username"],
+                        text=tweet.text,
+                        twitter_id=tweet.id,
+                    )
+                )  # zatím nechávám author fieldy jako default @PiratskaStrana
+
+        return Tweet.objects.bulk_create(tweets_to_save)
-- 
GitLab