Source code for gravityspy.table.events

# -*- coding: utf-8 -*-
# Copyright (C) Scott Coughlin (2017-)
#
# This file is part of gravityspy.
#
# gravityspy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gravityspy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gravityspy.  If not, see <http://www.gnu.org/licenses/>.

from gwtrigfind import find_trigger_files
from gwpy.segments import DataQualityFlag
from gwpy.table import GravitySpyTable
from gwpy.utils import mp as mp_utils
from sklearn.cluster import KMeans

from ..utils import log
from ..utils import utils
from ..api.project import GravitySpyProject
from ..ml.train_classifier import make_model

import panoptes_client
import numpy
import pandas
import subprocess
import string
import random
import os

[docs]class Events(GravitySpyTable):
    """This class provides method for classifying events with gravityspy
    """
[docs]    @classmethod
    def read(cls, *args, **kwargs):
        """Classify triggers in this table

        Parameters:
            `gwpy.table.GravitySpyTable`

        Returns:
            `Events` table
        """
        etg = kwargs.pop('etg', 'OMICRON')
        tab = super(Events, cls).read(*args, **kwargs)
        tab = tab.to_pandas()
        if 'gravityspy_id' not in tab.columns:
            tab['gravityspy_id'] = tab.apply(id_generator, 1)
            tab['image_status'] = 'testing'
            tab['data_quality'] = 'no_flag'
            tab['upload_flag'] = 0
            tab['citizen_score'] = 0.0
            tab['links_subjects'] = 0
            tab['url1'] = ''
            tab['url2'] = ''
            tab['url3'] = ''
            tab['url4'] = ''

        if etg == 'OMICRON':
            tab['event_id'] = tab['event_id'].apply(int)
            tab['process_id'] = tab['process_id'].apply(int)

        tab = cls.from_pandas(tab)

        if etg == 'OMICRON':
            tab['event_time'] = (tab['peak_time'] +
                                 (0.000000001)*tab['peak_time_ns'])
            tab['event_time'].format = '%.9f'
        else:
            raise ValueError("No trigger reading has "
                             "been defined for this ETG")

        return tab

[docs]    @classmethod
    def fetch(cls, *args, **kwargs):
        tab = super(Events, cls).fetch(*args, **kwargs)
        return cls(tab)

[docs]    def classify(self, path_to_cnn, **kwargs):
        """Classify triggers in this table

        Parameters:

            path_to_cnn:
                file name of the CNN you would like to use

            **kwargs:
                nproc : number of parallel event times to be processing at once

        Returns:
            `Events` table
        """
        if 'event_time' not in self.keys():
            raise ValueError("This method only works if you have defined "
                             "a column event_time for your "
                             "Event Trigger Generator.")

        # Parse key word arguments
        config = kwargs.pop('config', utils.GravitySpyConfigFile())
        plot_directory = kwargs.pop('plot_directory', 'plots')
        timeseries = kwargs.pop('timeseries', None)
        source = kwargs.pop('source', None)
        channel_name = kwargs.pop('channel_name', None)
        frametype = kwargs.pop('frametype', None)

        # make a list of event times
        inputs = zip(self['event_time'], self['ifo'],
                     self['gravityspy_id'])

        inputs = ((etime, ifo, gid, config, plot_directory,
                   timeseries, source, channel_name, frametype)
                  for etime, ifo, gid in inputs)

        # calculate maximum number of processes
        nproc = kwargs.pop('nproc', 1)

        # make q_scans
        output = mp_utils.multiprocess_with_queues(nproc,
                                                   _make_single_qscan,
                                                   inputs)

        qvalues = []
        # raise exceptions (from multiprocessing, single process raises inline)
        for f, x in output:
            if isinstance(x, Exception):
                x.args = ('Failed to make q scan at time %s: %s' % (f,
                                                                    str(x)),)
                raise x
            else:
                qvalues.append(x)

        self['q_value'] = qvalues

        results = utils.label_q_scans(plot_directory=plot_directory,
                                      path_to_cnn=path_to_cnn,
                                      **kwargs)

        results = results.to_pandas()
        results['Filename1'] = results['Filename1'].apply(lambda x, y : os.path.join(y, x),
                                                          args=(plot_directory,))
        results['Filename2'] = results['Filename2'].apply(lambda x, y : os.path.join(y, x),
                                                          args=(plot_directory,))
        results['Filename3'] = results['Filename3'].apply(lambda x, y : os.path.join(y, x),
                                                          args=(plot_directory,))
        results['Filename4'] = results['Filename4'].apply(lambda x, y : os.path.join(y, x),
                                                          args=(plot_directory,))


        results = Events.from_pandas(results.merge(self.to_pandas(),
                                                   on=['gravityspy_id']))
        return results

[docs]    def to_sql(self, table='glitches_v2d0', engine=None, **kwargs):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            table (str): name of SQL table
        """
        from sqlalchemy.engine import create_engine
        # connect if needed
        if engine is None:
            conn_kw = {}
            for key in ('db', 'host', 'user', 'passwd'):
                try:
                    conn_kw[key] = kwargs.pop(key)
                except KeyError:
                    pass
            engine = create_engine(get_connection_str(**conn_kw))

        self.to_pandas().to_sql(table, engine, index=False, if_exists='append')
        return

[docs]    def update_sql(self, table='glitches_v2d0', engine=None):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            table (str): name of SQL tabl
        """
        from sqlalchemy.engine import create_engine
        # connect if needed
        if engine is None:
            conn_kw = {}
            for key in ('db', 'host', 'user', 'passwd'):
                try:
                    conn_kw[key] = kwargs.pop(key)
                except KeyError:
                    pass
            engine = create_engine(get_connection_str(**conn_kw))

        column_dict = self.to_pandas().to_dict(orient='records')[0]
        sql_command = 'UPDATE {0} SET '.format(table)
        for column_name in column_dict:
            if isinstance(column_dict[column_name], str):
                sql_command = sql_command + '''\"{0}\" = \'{1}\', '''.format(column_name, column_dict[column_name])
            else:
                sql_command = sql_command + '''\"{0}\" = {1}, '''.format(column_name, column_dict[column_name])
        sql_command = sql_command[:-2] + ' WHERE \"gravityspy_id\" = \'' + self['gravityspy_id'].iloc[0] + "'"
        engine.execute(sql_command)
        return

[docs]    def upload_to_zooniverse(self, subject_set_id=None):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            subject_set_id (optional, int) : subject set id to upload to

        Returns:
            `Events` table
        """
        # First filter out images that have already been uploaded
        tab = self[self['upload_flag'] == 1]

        if subject_set_id is None:
            subset_ids = numpy.unique(tab['subjectset'])
        else:
            subset_ids = numpy.atleast_1d(numpy.array(subject_set_id))

        panoptes_client.Panoptes.connect()
        project = panoptes_client.Project.find(slug='zooniverse/gravity-spy')

        for subset_id in subset_ids:
            subjectset = panoptes_client.SubjectSet.find(subset_id)
            subjects = []

            if subject_set_id is None:
                tab1 = tab[tab['subjectset'] == subset_id]

            for fn1, fn2, fn3, fn4, gid in tab1['Filename1', 'Filename2', 'Filename3', 'Filename4', 'gravityspy_id']:
                subject = panoptes_client.Subject()
                subject.links.project = project
                subject.add_location(str(fn1))
                subject.add_location(str(fn2))
                subject.add_location(str(fn3))
                subject.add_location(str(fn4))
                subject.metadata['date'] = '20180825'
                subject.metadata['subject_id'] = str(gid)
                subject.metadata['Filename1'] = fn1.split('/')[-1]
                subject.metadata['Filename2'] = fn2.split('/')[-1]
                subject.metadata['Filename3'] = fn3.split('/')[-1]
                subject.metadata['Filename4'] = fn4.split('/')[-1]
                subject.save()
                subjects.append(subject)
                self[self['gravityspy_id'] == gid]['links_subject'] = int(subject.id)
                self[self['gravityspy_id'] == gid]['url1'] = subject.raw['locations'][0]['image/png'].split('?')[0]
                self[self['gravityspy_id'] == gid]['url2'] = subject.raw['locations'][1]['image/png'].split('?')[0]
                self[self['gravityspy_id'] == gid]['url3'] = subject.raw['locations'][2]['image/png'].split('?')[0]
                self[self['gravityspy_id'] == gid]['url4'] = subject.raw['locations'][3]['image/png'].split('?')[0]
                self['upload_flag'][self['gravityspy_id'] == gid] = 1
            subjectset.add(subjects)

        return self

[docs]    def update_scores(self, path_to_cnn, nproc=1, **kwargs):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            path_to_cnn (str): filename of model

        Returns:
            `Events` table with columns containing new scores
        """
        if not all(elem in self.keys() for elem in ['Filename1', 'Filename2',
                                                    'Filename3', 'Filename4']):
            raise ValueError("This method only works if the file paths "
                             "of the images of the images are known.")

        results = utils.label_select_images(filename1=self['Filename1'],
                                            filename2=self['Filename2'],
                                            filename3=self['Filename3'],
                                            filename4=self['Filename4'],
                                            path_to_cnn=path_to_cnn, **kwargs)



        return Events(results)

[docs]    def determine_workflow_and_subjectset(self, project_info_pickle):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            path_to_cnn (str): filename of file with Gravity Spy project info

        Returns:
            `Events` table with columns workflow and subjectset
        """
        if 'ml_confidence' not in self.keys() or 'ml_label' not in self.keys():
            raise ValueError("This method only works if the confidence and label "
                             "of the image in known.")
        gspyproject = GravitySpyProject.load_project_from_cache(
                                                                project_info_pickle
                                                                )

        workflows_for_each_class = gspyproject.get_level_structure(IDfilter='O2')
        # Determine subject set and workflow this should go to.
        level_of_images = []
        subjectset_of_images = []
        for label, confidence in zip(self['ml_label'], self['ml_confidence']):
            for iworkflow in ['1610', '1934', '1935', '7765', '7766', '7767']:
                if label in workflows_for_each_class[iworkflow].keys():
                     if workflows_for_each_class[iworkflow][label][2][1] <= \
                            confidence <= \
                                 workflows_for_each_class[iworkflow][label][2][0]:
                         level_of_images.append(int(workflows_for_each_class[iworkflow][label][0]))
                         subjectset_of_images.append(workflows_for_each_class[iworkflow][label][1])
                         break

        self["workflow"] = level_of_images
        self["subjectset"] = subjectset_of_images

        return self

[docs]    def create_collection(self, name=None, private=True,
                          default_subject=None):
        """Obtain omicron triggers to run gravityspy on

        Parameters:
            name (str, optional):
                name of collection
            private (bool, optional):
                would you like this collection to be private or public
            default_subject (int, optional):
                subject id to be the cover image of collection

        Returns:
            `str` url link to the created collection
        """
        if name is None:
            # will name it after the label of event table
            name = self['Label'][0]

        if default_subject is None:
            default_subject = self['links_subjects'][0]

        collection_url = ('https://www.zooniverse.org/'
                          'projects/zooniverse/gravity-spy/collections/')

        with panoptes_client.Panoptes() as client:
            client.connect()

            collection = panoptes_client.Collection()
            collection.links.project = '1104'
            collection.display_name = '{0}'.format(name)
            collection.private = private
            urltmp = collection.save()
            collection_url = collection_url + urltmp['collections'][0]['slug']
            collection.add(list(self['links_subjects']))
            collection.set_default_subject(default_subject)

        return collection_url

[docs]    def cluster(self, nclusters, random_state=30):
        """Create new clusters from feature space vectors

        Parameters:

            nclusters (int): how many clusters to try to group
                these triggers into

        Returns:
            `Events` table
        """
        if '0' not in self.columns:
            raise ValueError("You are trying to cluster but you do not have "
                             "the feature space information in this table.")

        features = self.to_pandas().values[:, 0:200]
        kmeans_1 = KMeans(nclusters, random_state=random_state).fit(features)
        clusters = kmeans_1.labels_
        self['clusters'] = clusters

        return self

[docs]    @classmethod
    def get_triggers(cls, start, end, channel,
                     dqflag, verbose=True, **kwargs):
        """Obtain omicron triggers to run gravityspy on

        Parameters:

            start (int): start of time to look for triggers
            end (int): end time to look for triggers
            channel (str): channel to look for triggers
            dqflag (str): name of segment during which to keep triggers

        Returns:
            `Events` table
        """
        duration_max = kwargs.pop('duration_max', None)
        duration_min = kwargs.pop('duration_min', None)
        frequency_max = kwargs.pop('frequency_max', 2048)
        frequency_min = kwargs.pop('frequency_min', 10)
        snr_max = kwargs.pop('snr_max', None)
        snr_min = kwargs.pop('snr_min', 7.5)

        detector = channel.split(':')[0]

        logger = log.Logger('Gravity Spy: Fetching Omicron Triggers')

        # Obtain segments that are analysis ready
        analysis_ready = DataQualityFlag.query('{0}:{1}'.format(detector,
                                                                dqflag),
                                              float(start), float(end))

        # Display segments for which this flag is true
        logger.info("Segments for which the {0} Flag "
                    "is active: {1}".format(dqflag, analysis_ready.active))

        # get Omicron triggers
        files = find_trigger_files(channel,'Omicron',
                                   float(start),float(end))

        triggers = cls.read(files, tablename='sngl_burst', format='ligolw')

        logger.info("Number of triggers "
                    "before any filtering: {0}".format(len(triggers)))

        masks = numpy.ones(len(triggers), dtype=bool)

        logger.info("duration filter "
                    "[{0}, {1}]".format(duration_min, duration_max))

        logger.info("frequency filter "
                    "[{0}, {1}]".format(frequency_min, frequency_max))

        logger.info("snr filter "
                    "[{0}, {1}]".format(snr_min, snr_max))

        if not duration_max is None:
            masks &= (triggers['duration'] <= duration_max)
        if not duration_min is None:
            masks &= (triggers['duration'] >= duration_min)
        if not frequency_max is None:
            masks &= (triggers['peak_frequency'] <= frequency_max)
        if not frequency_min is None:
            masks &= (triggers['peak_frequency'] >= frequency_min)
        if not snr_max is None:
            masks &= (triggers['snr'] <= snr_max)
        if not snr_min is None:
            masks &= (triggers['snr'] >= snr_min)

        triggers = triggers[masks]
        # Set peakGPS

        logger.info("Number of triggers after "
                    "snr, frequency, and duration filters "
                    "cuts but before {0} flag filtering: "
                    "{1}".format(dqflag, len(triggers)))

        # Filter the raw omicron triggers against the ANALYSIS READY flag.
        vetoed = triggers['event_time'].in_segmentlist(analysis_ready.active)
        triggers = triggers[vetoed]

        logger.info("Final trigger length: {0}".format(len(triggers)))

        return triggers


[docs]def id_generator(x, size=10,
                 chars=(string.ascii_uppercase +
                        string.digits +
                        string.ascii_lowercase)):
    """Obtain omicron triggers run gravityspy on

    Parameters:

        x (str): the item you would like a random id to be generated for
    Returns:
    """
    return ''.join(random.SystemRandom().choice(chars) for _ in range(size))

[docs]def get_connection_str(db='gravityspy',
                       host='gravityspy.ciera.northwestern.edu',
                       user=None,
                       passwd=None):
    """Create string to pass to create_engine
    """
    if (not user) or (not passwd):
        user = os.getenv('GRAVITYSPY_DATABASE_USER', None)
        passwd = os.getenv('GRAVITYSPY_DATABASE_PASSWD', None)

    if (not user) or (not passwd):
        raise ValueError('Remember to either pass '
                         'or export GRAVITYSPY_DATABASE_USER '
                         'and export GRAVITYSPY_DATABASE_PASSWD in order '
                         'to access the Gravity Spy Data: '
                         'https://secrets.ligo.org/secrets/144/'
                         ' description is username and secret is password.')

    return 'postgresql://{0}:{1}@{2}:5432/{3}'.format(user, passwd, host, db)

# define multiprocessing method
def _make_single_qscan(inputs):
    event_time = inputs[0]
    ifo = inputs[1]
    gid = inputs[2]
    config = inputs[3]
    plot_directory = inputs[4]
    timeseries = inputs[5]
    source = inputs[6]
    channel_name = inputs[7]
    frametype = inputs[8]

    # Parse Ini File
    plot_time_ranges = config.plot_time_ranges
    plot_normalized_energy_range = config.plot_normalized_energy_range
    try:
        if timeseries is not None:
            specsgrams, q_value = utils.make_q_scans(event_time=event_time,
                                                     config=config,
                                                     timeseries=timeseries)
        if source is not None:
            specsgrams, q_value = utils.make_q_scans(event_time=event_time,
                                                     config=config,
                                                     source=source)
        if channel_name is not None:
            specsgrams, q_value = utils.make_q_scans(event_time=event_time,
                                                     config=config,
                                                     channel_name=channel_name,
                                                     frametype=frametype)
        utils.save_q_scans(plot_directory, specsgrams,
                           plot_normalized_energy_range, plot_time_ranges,
                           ifo, event_time, id_string=gid)

        return event_time, q_value
    except Exception as exc:  # pylint: disable=broad-except
        if nproc == 1:
            raise
        else:
            return event_time, exc