From 6ee477c90a65e07132a320ab2bd248a25231f841 Mon Sep 17 00:00:00 2001 From: OndraRehounek <ondra.rehounek@seznam.cz> Date: Fri, 19 Aug 2022 11:48:28 +0200 Subject: [PATCH] twitter_utils --- .isort.cfg | 2 +- README.md | 1 + main/twitter.py | 3 + majak/settings/base.py | 3 + requirements/base.in | 1 + requirements/base.txt | 14 +++- twitter_utils/__init__.py | 0 twitter_utils/apps.py | 5 ++ twitter_utils/management/__init__.py | 0 twitter_utils/management/commands/__init__.py | 0 .../management/commands/update_tweets.py | 24 ++++++ twitter_utils/migrations/0001_initial.py | 40 ++++++++++ twitter_utils/migrations/__init__.py | 0 twitter_utils/models.py | 17 ++++ twitter_utils/services.py | 77 +++++++++++++++++++ 15 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 main/twitter.py create mode 100644 twitter_utils/__init__.py create mode 100644 twitter_utils/apps.py create mode 100644 twitter_utils/management/__init__.py create mode 100644 twitter_utils/management/commands/__init__.py create mode 100644 twitter_utils/management/commands/update_tweets.py create mode 100644 twitter_utils/migrations/0001_initial.py create mode 100644 twitter_utils/migrations/__init__.py create mode 100644 twitter_utils/models.py create mode 100644 twitter_utils/services.py diff --git a/.isort.cfg b/.isort.cfg index d782d6a4..642718cf 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -3,4 +3,4 @@ line_length = 88 multi_line_output = 3 include_trailing_comma = true -known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo +known_third_party = PyPDF2,arrow,bleach,bs4,captcha,celery,django,environ,faker,fastjsonschema,icalevnt,markdown,modelcluster,pirates,pytest,pytz,requests,sentry_sdk,taggit,tweepy,wagtail,wagtailmetadata,weasyprint,yaml,zoneinfo diff --git a/README.md b/README.md index f19d903a..7440ea63 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,7 @@ Přes CRON je třeba na pozadí spouštět Django `manage.py` commandy: * `publish_scheduled_pages` - publikuje naplánované stránky (každou hodinu) * `update_callendars` - stáhne a aktualizuje kalendáře (několikrát denně) * `update_redmine_issues` - aktualizuje programované body MS a KS stránek napojených na Redmine (několikrát denně) +* `update_tweets` - aktualizuje tweety z účtu PiratskaStrana (každou hodinu) - vyžaduje mít v .env TWITTER_BEARER_TOKEN ### Fulltextové vyhledávání v češtině diff --git a/main/twitter.py b/main/twitter.py new file mode 100644 index 00000000..7074b009 --- /dev/null +++ b/main/twitter.py @@ -0,0 +1,3 @@ +import tweepy + +print(public_tweets) diff --git a/majak/settings/base.py b/majak/settings/base.py index e1e42cb7..7e52665f 100644 --- a/majak/settings/base.py +++ b/majak/settings/base.py @@ -46,6 +46,7 @@ INSTALLED_APPS = [ "calendar_utils", "maps_utils", "redmine_utils", + "twitter_utils", "users", "pirates", "tuning", @@ -303,3 +304,5 @@ ELECTIONS2021_NEWSLETTER_SOURCE = env.str( MAPS_UTILS_MAPPROXY_URL = env.str( "MAPPROXY_URL", default="https://mapproxy.pir-test.eu" ) + +TWITTER_BEARER_TOKEN = env.str("TWITTER_BEARER_TOKEN") diff --git a/requirements/base.in b/requirements/base.in index 88cf58c9..a5efaec9 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -24,3 +24,4 @@ weasyprint pypdf2 pyyaml fastjsonschema +tweepy diff --git a/requirements/base.txt b/requirements/base.txt index 88989f37..ca34b60d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,12 +4,12 @@ # # pip-compile base.in # +--extra-index-url https://pip:glpat-KpSQYD89Uw-zz3VJTMmT@gitlab.com/api/v4/groups/6879633/-/packages/pypi/simple + amqp==5.1.1 # via kombu anyascii==0.3.1 # via wagtail -appnope==0.1.3 - # via ipython arrow==1.2.2 # via # -r base.in @@ -154,6 +154,10 @@ mozilla-django-oidc==2.0.0 # via pirates numpy==1.23.1 # via opencv-python +oauthlib==3.2.0 + # via + # requests-oauthlib + # tweepy opencv-python==4.6.0.66 # via -r base.in openpyxl==3.0.10 @@ -222,7 +226,11 @@ requests==2.28.1 # via # -r base.in # mozilla-django-oidc + # requests-oauthlib + # tweepy # wagtail +requests-oauthlib==1.3.1 + # via tweepy sentry-sdk==1.9.2 # via -r base.in six==1.16.0 @@ -254,6 +262,8 @@ traitlets==5.3.0 # via # ipython # matplotlib-inline +tweepy==4.10.0 + # via -r base.in urllib3==1.26.11 # via # requests diff --git a/twitter_utils/__init__.py b/twitter_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/twitter_utils/apps.py b/twitter_utils/apps.py new file mode 100644 index 00000000..abd73ac3 --- /dev/null +++ b/twitter_utils/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class TwitterUtilsConfig(AppConfig): + name = "twitter_utils" diff --git a/twitter_utils/management/__init__.py b/twitter_utils/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/twitter_utils/management/commands/__init__.py b/twitter_utils/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/twitter_utils/management/commands/update_tweets.py b/twitter_utils/management/commands/update_tweets.py new file mode 100644 index 00000000..cdefb21b --- /dev/null +++ b/twitter_utils/management/commands/update_tweets.py @@ -0,0 +1,24 @@ +from django.conf import settings +from django.core.management.base import BaseCommand + +from ...services import TweetDownloadService + + +class Command(BaseCommand): + def add_arguments(self, parser): + + parser.add_argument( + "--days_back", + default=1, + required=False, + type=int, + help="Stáří tweetů ve dnech", + ) + + def handle(self, *args, **options): + tds = TweetDownloadService( + bearer_token=settings.TWITTER_BEARER_TOKEN, days_back=options["days_back"] + ) + tds.perform_update() + + self.stdout.write("\nUpdating tweets finished!") diff --git a/twitter_utils/migrations/0001_initial.py b/twitter_utils/migrations/0001_initial.py new file mode 100644 index 00000000..a5e2a391 --- /dev/null +++ b/twitter_utils/migrations/0001_initial.py @@ -0,0 +1,40 @@ +# Generated by Django 4.0.7 on 2022-08-19 08:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="Tweet", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "author_img_url", + models.URLField( + default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg" + ), + ), + ("author_name", models.CharField(default="Piráti", max_length=128)), + ( + "author_username", + models.CharField(default="PiratskaStrana", max_length=128), + ), + ("text", models.TextField()), + ("twitter_id", models.CharField(max_length=32, unique=True)), + ], + ), + ] diff --git a/twitter_utils/migrations/__init__.py b/twitter_utils/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/twitter_utils/models.py b/twitter_utils/models.py new file mode 100644 index 00000000..8453dfb3 --- /dev/null +++ b/twitter_utils/models.py @@ -0,0 +1,17 @@ +from django.db import models + + +class Tweet(models.Model): + """ + Model pro ukládání Tweetů getnutých v manage commandu update_tweets z API Twitteru. + ID Tweetu ukládáme jako string, protože i limit BigInt je stejného řádu jako ID + nejnovějších Tweetů (2022). + """ + + author_img_url = models.URLField( + default="https://pbs.twimg.com/profile_images/1556544269443387394/jSO2A2Fr_200x200.jpg" + ) # TODO consider another default, maybe from static + author_name = models.CharField(max_length=128, default="Piráti") + author_username = models.CharField(max_length=128, default="PiratskaStrana") + text = models.TextField() + twitter_id = models.CharField(max_length=32, unique=True) diff --git a/twitter_utils/services.py b/twitter_utils/services.py new file mode 100644 index 00000000..61cf6b72 --- /dev/null +++ b/twitter_utils/services.py @@ -0,0 +1,77 @@ +from datetime import timedelta + +from django.utils import timezone +from tweepy import Client + +from .models import Tweet + + +class TweetDownloadService: + client: Client + days_back: int + + def __init__(self, bearer_token, days_back=1): + if not bearer_token: + raise RuntimeError("Twitter bearer token not set, cannot update tweets") + + self.client = Client(bearer_token=bearer_token) + self.days_back = days_back + + @staticmethod + def get_latest_saved_tweet_id() -> list[int]: + """ + Vrací IDs už uložených Tweetů - možná by stálo za to brát jen z určitého + časového období... + """ + return Tweet.objects.values_list("twitter_id", flat=True) + + def get_tweets_response(self, user_id) -> list: + """ + Vrací list tweetů (objektů) pro daného Twitter uživatele. + """ + tweets_response = self.client.get_users_tweets( + user_id, + expansions=["author_id", "entities.mentions.username"], + max_results=100, + start_time=timezone.now() - timedelta(days=self.days_back), + tweet_fields=["author_id", "created_at"], + user_fields=["name", "username"], + ) # 49022430 + + return tweets_response.data + + def get_user_response(self) -> dict: + """ + Vrací informace o uživateli "PiratskaStrana" + Tento call není až tak potřeba, pokud bychom zahardcodili ID 49022430 + """ + user_response = self.client.get_user( + username="PiratskaStrana", + user_fields=["profile_image_url"], # id, name, username enabled by default + ) + + return user_response.data + + def perform_update(self) -> int: + """ + Obaluje celý proces downloadu Tweetů z API do DB. + """ + user_data_dict = self.get_user_response() + downloaded_tweets_list = self.get_tweets_response(user_id=user_data_dict["id"]) + existing_tweet_id_list = self.get_latest_saved_tweet_id() + + tweets_to_save = [] + + for tweet in downloaded_tweets_list: + if str(tweet.id) not in existing_tweet_id_list: + tweets_to_save.append( + Tweet( + author_img_url=user_data_dict["profile_image_url"], + author_name=user_data_dict["name"], + author_username=user_data_dict["username"], + text=tweet.text, + twitter_id=tweet.id, + ) + ) # zatím nechávám author fieldy jako default @PiratskaStrana + + return Tweet.objects.bulk_create(tweets_to_save) -- GitLab