# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- # # Copyright 2011 Willem Jansen # # This file is part of duplicity. # # Duplicity is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # Duplicity is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with duplicity; if not, write to the Free Software Foundation, # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Current version # 0.0.2 (beta) # ToDos: # - Upload of files > 2GB import pdb import base64 import httplib import re import urllib import urllib2 import collections import mimetypes from datetime import * import time import math import duplicity.backend from duplicity import globals from duplicity import log from duplicity.errors import * address@hidden from duplicity import urlparse_2_5 as urlparser class RapidshareBackend(duplicity.backend.Backend): """Backend for accessing Rapidshare - contributed in 2011 by Willem Jansen """ """Connect to RapidShare via API""" def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Set Rapidshare-inherent constants self.__currFolderNum = 1 self.__Files_per_RSdir = 2999 self.__Max_Dirs = 1000000-1 self.__Max_UL_size = 2*1024*1024*1024 self.FolderID_root = 0 # check directory path on RS Side self.parsed_url = parsed_url if parsed_url.path: foldpath = re.compile('/+') self.directory = foldpath.sub('/', parsed_url.path + '/' ) else: self.directory = '/' ######################################## # Set username as work-around ('rs' has to be added in backend.py line 176 ff) and modify self.directory accordingly ######################################## if (self.parsed_url.username == None): self.parsed_url.username = parsed_url.path.split('@')[0].split('//')[1] tmp = self.directory.split('/')[2:] tmp.pop() self.directory = '/'.join(tmp) else: self.directory = self.directory.strip('/') # self.directory = tmp[1:tmp.__len__()-1] ######################################### ######################################### log.Info("Using Rapidshare host %s" % ('api.rapidshare.com',)) log.Info("Using Rapidshare port %s" % (443,)) log.Info("Using Rapidshare directory %s" % (self.directory,)) # Setup API connection for everything except up- and download if parsed_url.scheme == 'rs': self.api_conn = httplib.HTTPSConnection('api.rapidshare.com', 443) else: raise BackendException("Unknown URI scheme: %s" % (parsed_url.scheme)) for n in range(1, globals.num_retries+1): #Check login credentials & account status params = urllib.urlencode({'sub': 'getaccountdetails', 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) response = self.api_conn.getresponse() answer = response.read() #Check credentials (is password correct?) if (answer[:5] == "ERROR" or response.status >= 400): log.Warn("Credentials for RS-Server are incorrect or Server is unreachable") continue tmp_list = answer.strip('\n').split('\n') account_details = dict(map(lambda x: x.split('='), tmp_list)) # Check account status # RS currently offers 2 membership models: free and premium if (int(account_details['rapids'])>0 or int(account_details['billeduntil']) >= time.time()): self.account_status = "premium" else: # RS reserves the right to delete files in free accounts # ... not too good for Backups log.Warn('Free Rapidshare account is not save for backups!') self.account_status = "free" log.Info('Rapidshare account expires on %s.' % date.fromtimestamp(float(account_details['billeduntil']))) #Identify FolderID of Root-Folder: #a) get directory-structure dir_Structure = self.__getDirStructure() #b)Identify ID of Root-Folder (create folder if required) self.FolderID_root = self.__setup_BackupDirStructure(dir_Structure) return log.Warn("Rapidshare backend giving up after %d attempts to get remote directory information" % (globals.num_retries)) raise BackendException((response.status, response.reason)) def __getDirStructure(self): """ Get Directory structure from Rapidshare account """ """ Rapidshare allows to get the directory structure as a whole. The definition is backward-recursive, i.e., every folder knows its and its parent's ID. The Root folder has the ID 0.""" for n in range(1, globals.num_retries+1): params = urllib.urlencode({'sub': 'listrealfolders', 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) response = self.api_conn.getresponse() if response.status >= 400: continue # parse response and see if directory exists response_lines = response.read().strip('\n').split('\n') rs_directory = collections.namedtuple('Folder', 'RealFolder_ID, Parent_RealFolder_ID, Name, BROWSE_ACL, UPLOAD_ACL, DOWNLOAD_ACL') rs_dir_list = map(rs_directory._make, map(lambda x: x.split(',') ,response_lines)) return rs_dir_list log.Warn('RS Server does not return list of real folders') raise BackendException((response.status, response.reason)) def __setup_BackupDirStructure(self, rs_dir_list): """ Identify ID of Root folder, if directory dos not exist, create it """ """ Rootfolder ID is saved in self.FolderID_root """ FolderID_root = '0' for current_dir in self.directory.split("/"): item = filter(lambda x: (x.Parent_RealFolder_ID==FolderID_root and x.Name==current_dir), rs_dir_list) if item == []: # if the directory does not exist, create it. log.Info("Directory '%s' being created." % current_dir) error = False for n in range(1, globals.num_retries+1): params = urllib.urlencode({'sub': 'addrealfolder', 'name': current_dir, 'parent': FolderID_root, 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) print('Directory ' + current_dir + ' is being created in __setup_BackupDirStructure') response = self.api_conn.getresponse() answer = response.read() if (answer[:5] == 'ERROR' or response.status >= 400): print('Directory creation error in __setup_BackupDirStructure') error = True continue else: FolderID_root = answer error = False break if (error): log.Warn('Unable to setup directory structure in RS servers') raise BackendException((response.status, response.reason)) else: FolderID_root = item[0].RealFolder_ID return FolderID_root def __getFolderID(self, FolderID_root = None, rs_dir_list = None): """ RS can only list 3000 files per folder (self.__Files_per_RSdir), therefore we need subdirectories below the root folder to store the files. These have the form '000xxx'. The number of digits is log10(self.__Max_Dirs) once a folder holds more than self.__Files_per_RSdir, __getFolderID() creates the next directory and returns its ID """ # Check optional arguments if FolderID_root is None: FolderID_root = self.FolderID_root if rs_dir_list is None: rs_dir_list = self.__getDirStructure() # Go through numbered dirs and check number of files # Start at self.__currFolderNum to avoid going through potentially tons of already filled directories which might take ages for i in range(self.__currFolderNum, self.__Max_Dirs): digits = int(round(math.log10(self.__Max_Dirs))) i_str = '%0*d' % (digits, i) # Check if folder already available item = filter(lambda x: (x.Parent_RealFolder_ID==FolderID_root and x.Name==i_str), rs_dir_list) # if folder not there yet, create it if item == []: # if the directory does not exist, create it. log.Info("Directory '%s' being created." % i_str) error = False for n in range(1, globals.num_retries+1): params = urllib.urlencode({'sub': 'addrealfolder', 'name': i_str, 'parent': FolderID_root, 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) print('Directory ' + i_str + ' is being created in __getFolderID') response = self.api_conn.getresponse() answer = response.read() if (answer[:5] == 'ERROR' or response.status >= 400): error = True print('Directory creation error in getFolderID') continue else: error = False FolderID = answer break if (error): log.Warn("Unable to create directory: %s" % (i_str)) raise BackendException((response.status, response.reason)) break else: files = self.__listDir(item[0].RealFolder_ID) # if folder is there, is it already full? if (len(files) >= self.__Files_per_RSdir): continue else: FolderID = item[0].RealFolder_ID break # for the unlikely event that there are more than 3 billion files... if (i >= self.__Max_Dirs): log.Warn("RS Backend cannot handle more than %s files" % str(self.__Files_per_RSdir*self.__Max_Dirs)) raise BackendException("RS Backend cannot handle more than %s files" % str(self.__Files_per_RSdir*self.__Max_Dirs)) # Story current folder number to shorten search process for the next time self.__currFolderNum = i return FolderID def close(self): self.api_conn.close() def list(self): """ Return filenames only""" log.Info("Listing directory %s on RS server" % (self.directory,)) result = self.__list() return map(lambda x: x[1], result) def __list(self, FolderID = None): """ List filenames AND file-IDs. IDs are required for download """ if FolderID is None: FolderID = self.FolderID_root dir_structure = self.__getDirStructure() subdirs = filter(lambda x: x.Parent_RealFolder_ID==FolderID, dir_structure) result = [] # List files in all subdirectories of the root-folder. Subfolders are required as one RS-folder can only hold 3000 files for i in subdirs: result.extend(self.__listDir(i.RealFolder_ID)) return result def __listDir(self, FolderID = None): """List files in a given directory, return filenames and FileIDs """ if FolderID is None: FolderID = self.FolderID_root for n in range(1, globals.num_retries+1): params = urllib.urlencode({'sub': 'listfiles', 'realfolder': FolderID, 'fields':'fileid,filename,killdeadline', 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) response = self.api_conn.getresponse() answer = response.read() if (response.status >= 400 or answer[:5] == 'ERROR'): continue result = [] if (answer != 'NONE'): directory_listing = answer.strip('\n').split('\n') for i in directory_listing: result.append(i.split(',')[:2]) return result log.Warn("Rapidshare backend giving up after %d attempts to list directory" % globals.num_retries) raise BackendException((response.status, response.reason)) def get(self, remote_filename, local_path): """ Get remote file, saving it to local_path """ target_file = local_path.open("wb") for n in range(1, globals.num_retries+1): log.Info("Retrieving %s from Rapidshare server" % ((self.directory + '/' + remote_filename))) # get filelist and identify file directory_listing = self.__list() FileID = filter(lambda x: (x[1]==remote_filename), directory_listing)[0] # Obtain Download-information if self.account_status == "free": params = urllib.urlencode({'sub': 'download', 'fileid': FileID[0], 'filename':FileID[1], 'try': '1'}) else: params = urllib.urlencode({'sub': 'download', 'fileid': FileID[0], 'filename':FileID[1], 'try': '1', 'login': self.parsed_url.username, 'password': self.get_password()}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) response = self.api_conn.getresponse() answer = response.read() if (response.status >= 400 or answer[:5]=="ERROR"): log.Info("RS GET attempt #%d failed: %s %s" % (n, response.status, response.reason)) continue # Respone has the format "DL:$hostname,$dlauth,$countdown" download_coordinates = answer[3:].split(',') # Download file if self.account_status == "free": log.Info("Waiting time for Rapidshare-download: %s seconds" % (float(download_coordinates[2]),)) # Free RS accounts require waitingtime before download time.sleep(float(download_coordinates[2])) connection = httplib.HTTPConnection(download_coordinates[0], 80) params = urllib.urlencode({'sub': 'download', 'fileid': FileID[0], 'filename':FileID[1], 'dlauth': download_coordinates[1]}) else: connection = httplib.HTTPSConnection(download_coordinates[0], 443) params = urllib.urlencode({'sub': 'download', 'fileid': FileID[0], 'filename':FileID[1], 'login': self.parsed_url.username, 'password': self.get_password()}) connection.request("POST", "/cgi-bin/rsapi.cgi", params) # Download file and store it locally response = connection.getresponse() if response.status == 200: target_file.write(response.read()) assert not target_file.close() local_path.setdata() connection.close() return log.Info("RS GET attempt #%d failed: %s %s" % (n, response.status, response.reason)) log.Warn("RS backend giving up after %d attempts to GET %s" % (globals.num_retries, url)) raise BackendException((response.status, response.reason)) def __encode_multipart_formdata(self, fields, files): """ fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files Return (content_type, body) ready for httplib.HTTP instance """ BOUNDARY = '----------632865735RS4EVER5675865' CRLF = '\r\n' L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (files[0], files[1])) L.append('Content-Type: %s' % self.__get_content_type(files[1])) L.append('') L.append(files[2]) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body def __get_content_type(self, filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' def put(self, source_path, remote_filename = None): """Transfer source_path to remote_filename, find correct subfolder first""" if not remote_filename: remote_filename = source_path.get_filename() # Check filesize, must be smaller 2GB. Current implementation does not allow for upload for files > 2GB. getsize() seems not to work with the signature file, thus the "try"-clause try: filesize = source_path.getsize() except Exception: filesize = 0 if (filesize > self.__Max_UL_size): log.Warn('Rapidshare backend does currently not support uploads with file sizes >2GB') raise BackendException('Rapidshare backend does currently not support uploads with file sizes >2GB') for n in range(1, globals.num_retries+1): source_file = source_path.open("rb") log.Info("Saving %s on RS server" % (self.directory + '/' + remote_filename)) # Identify next free uploadserver params = urllib.urlencode({'sub': 'nextuploadserver'}) self.api_conn.request("POST", "/cgi-bin/rsapi.cgi", params) response = self.api_conn.getresponse() answer = response.read() if (response.status >= 400 or answer[:5] == 'ERROR'): continue rs_uploadserverID = answer fields = ('sub', 'upload'), ('login', self.parsed_url.username), ('password', self.get_password()), ('folder', self.__getFolderID()) file = 'filecontent', remote_filename, str(source_file.read()) content_type, body = self.__encode_multipart_formdata(fields, file) # Upload file, free accounts can only use HTTP if (self.account_status == "free"): connection = httplib.HTTPConnection('rs'+rs_uploadserverID+'.rapidshare.com', 80) else: connection = httplib.HTTPSConnection('rs'+rs_uploadserverID+'.rapidshare.com', 443) headers = { 'User-Agent': 'Firefox', 'Content-Type': content_type } connection.request('POST', "/cgi-bin/rsapi.cgi", body, headers) response = connection.getresponse() if response.status >= 400: log.Info("RS upload attempt #%d failed: %s %s" % (n, response.status, response.reason)) continue elif ((response.status == 200) or (response.status == 201)): response.read() assert not source_file.close() connection.close() return log.Warn("RS backend giving up after %d attempts to upload %s" % (globals.num_retries, (self.directory + '/' + remote_filename))) raise BackendException((response.status, response.reason)) duplicity.backend.register_backend("rs", RapidshareBackend)