summaryrefslogtreecommitdiff
path: root/britney2/policies/email.py
blob: 5d10ed32b4cb608a34c3d25ec57acbedf582d1fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
import os
import re
import json
import math
import socket
import smtplib

from urllib.error import HTTPError
from urllib.parse import unquote
from collections import defaultdict

from britney2.policies.rest import Rest
from britney2.policies.policy import BasePolicy, PolicyVerdict


# Recurring emails should never be more than this many days apart
MAX_INTERVAL = 30

API_PREFIX = 'https://api.launchpad.net/1.0/'
USER = API_PREFIX + '~'

# Don't send emails to these bots
BOTS = {
    USER + 'ci-train-bot',
    USER + 'bileto-bot',
    USER + 'ubuntu-archive-robot',
    USER + 'katie',
}

MESSAGE = """From: Ubuntu Release Team <noreply@canonical.com>
To: {recipients}
X-Proposed-Migration: notice
Subject: [proposed-migration] {source_name} {version} stuck in {series}-proposed for {age} day{plural}.

Hi,

{source_name} {version} needs attention.

It has been stuck in {series}-proposed for {age} day{plural}.

You either sponsored or uploaded this package, please investigate why it hasn't been approved for migration.

http://people.canonical.com/~ubuntu-archive/proposed-migration/{series}/update_excuses.html#{source_name}

https://wiki.ubuntu.com/ProposedMigration

If you have any questions about this email, please ask them in #ubuntu-release channel on Freenode IRC.

Regards, Ubuntu Release Team.
"""


def person_chooser(source):
    """Assign blame for the current source package."""
    people = {
        source['package_signer_link'],
        source['sponsor_link'],
        source['creator_link'],
    } - {None} - BOTS
    # some bots (e.g. bileto) generate uploads that are otherwise manual. We
    # want to email the people that the bot was acting on behalf of.
    bot = source['package_signer_link'] in BOTS
    # direct uploads
    regular = not source['creator_link'] and not source['sponsor_link']
    if bot or regular:
        people.add(source['package_creator_link'])
    return people


def address_chooser(addresses):
    """Prefer @ubuntu and @canonical addresses."""
    first = ''
    canonical = ''
    for address in addresses:
        if address.endswith('@ubuntu.com'):
            return address
        if address.endswith('@canonical.com'):
            canonical = address
        if not first:
            first = address
    return canonical or first


class EmailPolicy(BasePolicy, Rest):
    """Send an email when a package has been rejected."""

    def __init__(self, options, suite_info, dry_run=False):
        super().__init__('email', options, suite_info, {'unstable'})
        self.filename = os.path.join(options.unstable, 'EmailCache')
        # Maps lp username -> email address
        self.addresses = {}
        # Dict of dicts; maps pkg name -> pkg version -> boolean
        self.emails_by_pkg = defaultdict(dict)
        # self.cache contains self.emails_by_pkg from previous run
        self.cache = {}
        self.dry_run = dry_run
        self.email_host = getattr(self.options, 'email_host', 'localhost')

    def initialise(self, britney):
        """Load cached source ppa data"""
        super().initialise(britney)

        if os.path.exists(self.filename):
            with open(self.filename, encoding='utf-8') as data:
                self.cache = json.load(data)
            self.log("Loaded cached email data from %s" % self.filename)
        tmp = self.filename + '.new'
        if os.path.exists(tmp):
            # if we find a record on disk of emails sent from an incomplete
            # britney run, merge them in now.
            with open(tmp, encoding='utf-8') as data:
                self.cache.update(json.load(data))
            self._save_progress(self.cache)
            self.save_state()

    def _scrape_gpg_emails(self, person):
        """Find email addresses from one person's GPG keys."""
        if person in self.addresses:
            return self.addresses[person]
        addresses = []
        try:
            gpg = self.query_lp_rest_api(person + '/gpg_keys', {})
            for key in gpg['entries']:
                details = self.query_rest_api('http://keyserver.ubuntu.com/pks/lookup', {
                    'op': 'index',
                    'search': '0x' + key['fingerprint'],
                    'exact': 'on',
                    'options': 'mr',
                })
                for line in details.splitlines():
                    parts = line.split(':')
                    if parts[0] == 'info':
                        if int(parts[1]) != 1 or int(parts[2]) > 1:
                            break
                    if parts[0] == 'uid':
                        flags = parts[4]
                        if 'e' in flags or 'r' in flags:
                            continue
                        uid = unquote(parts[1])
                        match = re.match(r'^.*<(.+@.+)>$', uid)
                        if match:
                            addresses.append(match.group(1))
            address = self.addresses[person] = address_chooser(addresses)
            return address
        except HTTPError as e:
            if e.code != 410:  # suspended user
                raise
            self.log('Ignoring person %s as suspended in Launchpad' % person)
            return None

    def scrape_gpg_emails(self, people):
        """Find email addresses from GPG keys."""
        emails = [self._scrape_gpg_emails(person) for person in (people or [])]
        return [email for email in emails if email is not None]

    def lp_get_emails(self, pkg, version):
        """Ask LP who uploaded this package."""
        data = self.query_lp_rest_api('%s/+archive/primary' % self.options.distribution, {
            'ws.op': 'getPublishedSources',
            'distro_series': '/%s/%s' % (self.options.distribution, self.options.series),
            'exact_match': 'true',
            'order_by_date': 'true',
            'pocket': 'Proposed',
            'source_name': pkg,
            'version': version,
        })
        try:
            source = next(reversed(data['entries']))
        # IndexError means no packages in -proposed matched this name/version,
        # which is expected to happen when bileto runs britney.
        except StopIteration:
            self.log('Email getPublishedSources IndexError (%s %s)' % (pkg, version))
            return []
        return self.scrape_gpg_emails(person_chooser(source))

    def apply_policy_impl(self, email_info, suite, source_name, source_data_tdist, source_data_srcdist, excuse):
        """Send email if package is rejected."""
        max_age = 5 if excuse.is_valid else 1
        series = self.options.series
        version = source_data_srcdist.version
        age = int(excuse.daysold) or 0
        plural = '' if age == 1 else 's'
        # an item is stuck if it's
        # - old enough
        # - not blocked
        # - not temporarily rejected (e.g. by the autopkgtest policy when tests
        #   are still running)
        stuck = age >= max_age and 'block' not in excuse.reason and \
            excuse.current_policy_verdict != PolicyVerdict.REJECTED_TEMPORARILY
        if self.dry_run:
            self.log("[email dry run] Considering: %s/%s: %s" %
                     (source_name, version, "stuck" if stuck else "not stuck"))
        if not stuck:
            return PolicyVerdict.PASS

        cached = self.cache.get(source_name, {}).get(version)
        try:
            emails, last_sent = cached
            # migration of older data
            last_sent = int(last_sent)
            # Find out whether we are due to send another email by calculating
            # the most recent age at which we should have sent one.  A
            # sequence of doubling intervals (0 + 1 = 1, 1 + 2 = 3, 3 + 4 = 7)
            # is equivalent to 2^n-1, or 2^n + (max_age - 1) - 1.
            # 2^(floor(log2(age))) straightforwardly calculates the most
            # recent age at which we wanted to send an email.
            last_due = int(math.pow(2, int(math.log(age + 2 - max_age, 2)))
                           + max_age - 2)
            # Don't let the interval double without bounds.
            if last_due - max_age >= MAX_INTERVAL:
                last_due = int((age - max_age - MAX_INTERVAL) / MAX_INTERVAL) \
                           * MAX_INTERVAL + max_age + MAX_INTERVAL
            # And don't send emails before we've reached the minimum age
            # threshold.
            if last_due < max_age:
                last_due = max_age

        except TypeError:
            # This exception happens when source_name, version never seen before
            emails = []
            last_sent = 0
            last_due = max_age
        if self.dry_run:
            self.log("[email dry run] Age %d >= threshold %d: would email: %s" %
                     (age, max_age, self.lp_get_emails(source_name, version)))
            # don't update the cache file in dry run mode; we'll see all output each time
            return PolicyVerdict.PASS
        if last_sent < last_due:
            if not emails:
                emails = self.lp_get_emails(source_name, version)
            if emails:
                recipients = ', '.join(emails)
                msg = MESSAGE.format(**locals())
                try:
                    self.log("%s/%s stuck for %d days, emailing %s" %
                             (source_name, version, age, recipients))
                    server = smtplib.SMTP(self.email_host)
                    server.sendmail('noreply@canonical.com', emails, msg)
                    server.quit()
                    # record the age at which the mail should have been sent
                    last_sent = last_due
                except socket.error as err:
                    self.log("Failed to send mail! Is SMTP server running?")
                    self.log(err)
        self.emails_by_pkg[source_name][version] = (emails, last_sent)
        self._save_progress(self.emails_by_pkg)
        return PolicyVerdict.PASS

    def _save_progress(self, my_data):
        """Checkpoint after each sent mail"""
        tmp = self.filename + '.new'
        with open(tmp, 'w', encoding='utf-8') as data:
            json.dump(my_data, data)
        return tmp

    def save_state(self, britney=None):
        """Save email notification status of all pending packages"""
        if not self.dry_run:
            try:
                os.rename(self.filename + '.new', self.filename)
            # if we haven't written any cache, don't clobber the old one
            except FileNotFoundError:
                pass
        if britney:
            self.log("Wrote email data to %s" % self.filename)