diff --git a/ProjectHome.md b/ProjectHome.md new file mode 100644 index 0000000..fa188d7 --- /dev/null +++ b/ProjectHome.md @@ -0,0 +1 @@ +This project provides an easy way to automate downloading of data tables from Google Webmaster Tools. \ No newline at end of file diff --git a/example-create-spreadsheet.py b/Running.md similarity index 50% rename from example-create-spreadsheet.py rename to Running.md index 518fe36..14d2247 100644 --- a/example-create-spreadsheet.py +++ b/Running.md @@ -1,54 +1,70 @@ -#!/usr/bin/python -# -# Copyright 2011 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Example script for how to download a CSV file and upload it to Google docs. - -Download a Top Search Queries CSV file and then upload the file to Google docs -as a spreadheet. The email, password, and website values used here must be -replaced with real values. See wiki document on its use. -""" - -# Import the downloader -from downloader import Downloader -# Import gdata objects for uploading files to Google Docs -import gdata.docs -import gdata.docs.service - -# Email address and password used to sign-in to Webmaster Tools and Google Docs -email = 'user@example.com' -password = '********' -# Specify the website and the type of data to download -website = 'http://www.example.com/' -selected_downloads = ['TOP_QUERIES'] - -# Instantiate the downloader object -downloader = Downloader() -# Authenticate with your Webmaster Tools sign-in info -downloader.LogIn(email, password) -# Initiate the download -downloader.DoDownload(website, selected_downloads) - -# Create a client class which will make HTTP requests with Google Docs server -gd_client = gdata.docs.service.DocsService() -# Authenticate using your Google Docs sign-in info -gd_client.ClientLogin(email, password) - -# Get the csv file name that was generated by the downloader script and use it to upload the file to Google Docs -for line in downloader.GetDownloadedFiles(): - ms = gdata.MediaSource(file_path=line.strip(), content_type=gdata.docs.service.SUPPORTED_FILETYPES['CSV']) - entry = gd_client.Upload(ms, line.strip()) - # Display the URL of the uploaded file to the command line's standard-output - print 'Spreadsheet now accessible online at:', entry.GetAlternateLink().href \ No newline at end of file +How to download a file from Google Webmaster Tools. + +# Introduction # + +This documentation explains how to automate the file download process from Google Webmaster Tools by using the python script downloader.py. + +# Details # +The steps for running this example script are as follows: + + 1. Download and install the Google Data APIs Python Client Library. + 1. Create a folder and add the downloader.py script to it. + 1. In the same folder where you added downloader.py, create and run the following Python script. You’ll need to replace the example values for “website”, “email” and “password” with valid values for your site. + +``` +#!/usr/bin/python + +# Import the downloader +from downloader import Downloader + +# Email address and password used to sign-in to Webmaster Tools +email = 'user@example.com' +password = '********' +# Specify the website and the type of data to download +website = 'http://www.example.com/' +selected_downloads = ['TOP_QUERIES'] + +# Instantiate the downloader object +downloader = Downloader() +# Authenticate with your Webmaster Tools sign-in info +downloader.LogIn(email, password) +# Initiate the download +downloader.DoDownload(website, selected_downloads) +``` + +Here is a more advanced example of how to automatically download a file, then upload it to Google Docs. +``` +#!/usr/bin/python + +# Import the downloader +from downloader import Downloader +# Import gdata objects for uploading files to Google Docs +import gdata.docs +import gdata.docs.service + +# Email address and password used to sign-in to Webmaster Tools and Google Docs +email = 'user@example.com' +password = '********' +# Specify the website and the type of data to download +website = 'http://www.example.com/' +selected_downloads = ['TOP_QUERIES'] + +# Instantiate the downloader object +downloader = Downloader() +# Authenticate with your Webmaster Tools sign-in info +downloader.LogIn(email, password) +# Initiate the download +downloader.DoDownload(website, selected_downloads) + +# Create a client class which will make HTTP requests with Google Docs server +gd_client = gdata.docs.service.DocsService() +# Authenticate using your Google Docs sign-in info +gd_client.ClientLogin(email, password) + +# Get the csv file name that was generated by the downloader script and use it to upload the file to Google Docs +for line in downloader.GetDownloadedFiles(): + ms = gdata.MediaSource(file_path=line.strip(), content_type=gdata.docs.service.SUPPORTED_FILETYPES['CSV']) + entry = gd_client.Upload(ms, line.strip()) + # Display the URL of the uploaded file to the command line's standard-output + print 'Spreadsheet now accessible online at:', entry.GetAlternateLink().href +``` \ No newline at end of file diff --git a/downloader.py b/downloader.py deleted file mode 100644 index 5028211..0000000 --- a/downloader.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2011 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Module for downloading CSV files from Google Webmaster Tools. - -Module handles authentication with the Google servers by using the gdata -module provided by Google. This script requires the gdata package be -installed in order to run. - - Downloader: Handles download implementation. -""" - - -import json -import gdata.webmastertools.service - - -class Downloader(object): - """Handles client authentication and requests for Webmaster data. - - Contains the logic needed to authenticate with Google servers and - download CSV files from Webmaster Tools. - """ - HOST = 'www.google.com' - APP_NAME = 'Google-WMTdownloadscript-0.1' - LIST_PATH = '/webmasters/tools/downloads-list?hl=%s&siteUrl=%s' - FILE_NAME_HEADER = 'content-disposition' - TEXT_BEFORE_NAME = 'attachment; filename=' - - def __init__(self): - self._client = gdata.webmastertools.service.GWebmasterToolsService() - self._logged_in = False - self._language = 'en' - self._downloaded = [] - - def IsLoggedIn(self): - """Check if client has logged into their Google account yet.""" - return self._logged_in - - def LogIn(self, email, password, captcha_answer=None): - """Attempts to log into the specified Google account. - - Args: - email: User's Google email address. - password: Password for Google account. - captcha_answer: Optional answer to the captcha challenge. - - Raises: - CaptchaRequired: If the login service requires a captcha response. - BadAuthentication: If email/password is incorrect. - """ - if self._client.captcha_token and captcha_answer: - self._client.ClientLogin(email, password, source=self.APP_NAME, - captcha_token=self._client.captcha_token, - captcha_response=captcha_answer) - else: - self._client.ClientLogin(email, password, source=self.APP_NAME) - - self._logged_in = True - - def DoDownload(self, site, tables_to_download): - """Download CSV files and write them to disk. - - Downloader must be logged in before this method can be called, otherwise - it will raise a ValueError. - - Args: - site: URL for a website managed by the webmaster. - tables_to_download: List of features that should be downloaded. - - Raises: - ValueError: If the client has not logged in yet. - """ - if not self.IsLoggedIn(): - raise ValueError('Client not logged in.') - - available = self._GetDownloadList(site) - sites_json = json.loads(available) - - for key in tables_to_download: - url = sites_json.get(key) - if url: - self._DownloadFile(url) - - def GetDownloadedFiles(self): - """Get a list of downloads that have been written to disk. - - Returns: - A list containing the names of all the downloaded files that have been - written by this module. - """ - return self._downloaded - - def GetCaptchaUrl(self): - return self._client.captcha_url - - def SetLanguage(self, language_code): - self._language = language_code - - def _GetDownloadList(self, site): - """Query the server for list of download URLs. - - Args: - site: URL for a website managed by the webmaster. - - Returns: - A string of JSON data giving a mapping between Webmaster Tools features - and download URLs. For example: - - {'TOP_QUERIES':'webmaster/tools/top-search-queries-dl?...'} - """ - url = self._GetFullUrl(self.LIST_PATH % (self._language, site)) - res_stream = self._client.request('GET', url) - download_list = res_stream.read() - res_stream.close() - return download_list - - def _GetFullUrl(self, path): - """Construct an absolute URL using path segment. - - Args: - path: The path segment of a URL. - - Returns: - A URL giving the absolute path to a resource. - """ - return 'https://' + self.HOST + path - - def _DownloadFile(self, path): - """Download the file and write it to disk. - - Downloads the file based on the given URL. The file name of the download - is included in the header. Prints the file name to standard out. - - Args: - path: The path segment of the URL to download. - """ - url = self._GetFullUrl(path) - in_stream = self._client.request('GET', url) - file_name = in_stream.getheader(self.FILE_NAME_HEADER) - file_name = file_name.lstrip(self.TEXT_BEFORE_NAME) - self._downloaded.append(file_name) - out_file = open(file_name, 'w') - out_file.write(in_stream.read()) - - in_stream.close() - out_file.close() diff --git a/example-simple-download.py b/example-simple-download.py deleted file mode 100644 index bf9a6c2..0000000 --- a/example-simple-download.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2011 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Example script for how to download a CSV file. - -Download a Top Search Queries CSV file and write it to disk. The email, -password, and website values used here must be replaced with real values. -See wiki document on its use. -""" - -# Import the downloader -from downloader import Downloader - -# Email address and password used to sign-in to Webmaster Tools -email = 'user@example.com' -password = '********' -# Specify the website and the type of data to download -website = 'http://www.example.com/' -selected_downloads = ['TOP_QUERIES'] - -# Instantiate the downloader object -downloader = Downloader() -# Authenticate with your Webmaster Tools sign-in info -downloader.LogIn(email, password) -# Initiate the download -downloader.DoDownload(website, selected_downloads)