How to parse the data from Google Alerts?

Firstly, How would you get Google Alerts information into a database other than to parse the text of the email message that Google sends you?

It seems that there is no Google Alerts API.

If you must parse text, how would you go about parsing out the relevant pieces of the email message?

Answers


When you create the alert, set the "Deliver To" to "Feed" and then you can consume the feed XML as you would any other feed. This is much easier to parse and digest into a database.


class googleAlerts{
    public function createAlert($alert){
        $USERNAME = 'XXXXXX@gmail.com';
        $PASSWORD = 'YYYYYY';
        $COOKIEFILE = 'cookies.txt';

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
        curl_setopt($ch, CURLOPT_TIMEOUT, 120);

        curl_setopt($ch, CURLOPT_URL,
            'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
        $data = curl_exec($ch);

        $formFields = $this->getFormFields($data);

        $formFields['Email']  = $USERNAME;
        $formFields['Passwd'] = $PASSWORD;
        unset($formFields['PersistentCookie']);

        $post_string = '';
        foreach($formFields as $key => $value) {
            $post_string .= $key . '=' . urlencode($value) . '&';
        }

        $post_string = substr($post_string, 0, -1);

        curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);

        $result = curl_exec($ch);

        if (strpos($result, '<title>') === false) {
            return false;

        } else {
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
            curl_setopt($ch, CURLOPT_POST, 0);
            curl_setopt($ch, CURLOPT_POSTFIELDS, null);

            $result = curl_exec($ch);

            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
            curl_setopt($ch, CURLOPT_POST, 0);
            $result = curl_exec($ch);
            //var_dump($result);
            $result = $this->getFormFieldsCreate($result);
            $result['q'] = $alert;
            $result['t'] = '7';
            $result['f'] = '1';
            $result['l'] = '0';
            $result['e'] = 'feed';
            unset($result['PersistentCookie']);

            $post_string = '';
            foreach($result as $key => $value) {
                $post_string .= $key . '=' . urlencode($value) . '&';
            }

            $post_string = substr($post_string, 0, -1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
            $result = curl_exec($ch);
            curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
            $result = curl_exec($ch);
            if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
                return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
            } else {
                return false;
            }


        }
    }

    private function getFormFields($data)
    {
        if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form');
        }
    }
    private function getFormFieldsCreate($data)
    {
        if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
            $inputs = $this->getInputs($matches[1]);

            return $inputs;
        } else {
            die('didnt find login form1');
        }
    }


    private function getInputs($form)
    {
        $inputs = array();

        $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);

        if ($elements > 0) {
            for($i = 0; $i < $elements; $i++) {
                $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);

                if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                    $name  = $name[1];
                    $value = '';

                    if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
                        $value = $value[1];
                    }

                    $inputs[$name] = $value;
                }
            }
        }

        return $inputs;
    }
}
$alert = new googleAlerts;
echo $alert->createAlert('YOUR ALERT');

It will return link to rss feed of your newly created alert


I found a Google Alerts API here. It's pretty minimal and I haven't tested it.


Need Your Help

Applying a Gradient to CAShapeLayer

iphone core-graphics quartz-graphics gradient cashapelayer

Does anyone have any experience in applying a Gradient to a CAShapeLayer? CAShapeLayer is a fantastic layer class, but it appears to only support solid fill coloring, whereas I'd like it to have a