Diff
Modified: branches/datasets/app/controllers/data_sets_controller.rb (2895 => 2896)
--- branches/datasets/app/controllers/data_sets_controller.rb 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/controllers/data_sets_controller.rb 2012-01-10 15:56:40 UTC (rev 2896)
@@ -52,8 +52,15 @@
end
def download
- send_data @data_set.create_zip(current_user).string, :disposition => "attachment",
- :filename => @data_set.archive_file_name
+ if @data_set.estimate_size < Conf.max_upload_size
+ send_data File.read(@data_set.create_zip(current_user).path), :disposition => "attachment",
+ :filename => @data_set.archive_file_name
+ else
+ flash[:error] = "This data set is too big to download as a zip file. You may download each data item separately, however"
+ respond_to do |format|
+ format.html { redirect_to data_set_url(@data_set) }
+ end
+ end
end
def new
Modified: branches/datasets/app/models/data_item.rb (2895 => 2896)
--- branches/datasets/app/models/data_item.rb 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/models/data_item.rb 2012-01-10 15:56:40 UTC (rev 2896)
@@ -7,7 +7,7 @@
# Things that can be attached as data to a workflow port:
# NOTE: If adding a new data type, be sure to update app/helpers/data_sets_helper.rb with a
- # description of the new type.
+ # description of the new type. Also check the estimate_size method of models/data_set.rb still makes sense.
# - These types of data are independent from the DataItem and should remain in the DB regardless of what happens to
# the DataItem connected to it:
Modified: branches/datasets/app/models/data_set.rb (2895 => 2896)
--- branches/datasets/app/models/data_set.rb 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/models/data_set.rb 2012-01-10 15:56:40 UTC (rev 2896)
@@ -5,8 +5,13 @@
class DataSet < ActiveRecord::Base
- SUPPORTED_TYPES = ["Taverna 1", "Taverna 2", "RapidMiner"] # Only supporting these for now, due to how input/output
- # ports are fetched
+ SUPPORTED_TYPES = ["Taverna 1", "Taverna 2", "RapidMiner"].freeze # Only supporting these for now, due to how input/output
+ # ports are fetched
+
+ TEMPFILE_LIFE = 2 * (60 * 60 * 24).freeze # Files older than this will be deleted when the create_zip method is called.
+ # Set to 2 days to ensure that a file isn't deleted whilst someone is downloading it
+ # Could maybe go in settings.yml?
+
include ZipInMemory
include ActionController::UrlWriter #To generate URLs for the metadata file of the zip archive
default_url_options[:host] = URI.parse(Conf.base_uri).host
@@ -24,15 +29,38 @@
validates_inclusion_of :category, :in => ["example_data"] #Need some more categories!
+ # Get a rough estimate of the (uncompressed) data set size, for checking whether downloading is sensible
+ def estimate_size
+ size = 0
+ data_items.each do |data_item|
+ if data_item.data.respond_to?(:content_blob)
+ size += data_item.data.content_blob.data.size
+ else
+ size += data_item.data.data.size
+ end
+ end
+
+ size
+ end
+
# Zips the pack up and returns a StringIO object containing its contents
def create_zip(user)
# Some simple stats to be included in the metadata file
stats = {"input" => {:text => 0, :files => 0, :hidden => 0},
"output" => {:text => 0, :files => 0, :hidden => 0}}
- #Create the zip file
- new_zip(StringIO.new) do |zipfile|
+ # Make temp folder if it doesn't already exist
+ FileUtils.mkdir(DataSet.archive_temp_folder) unless File.exists?(DataSet.archive_temp_folder)
+ # Delete old temp zip files
+ # Todo: Fixme: This needs to happen when server is first set up, too.
+ # If 1 million people create zip files on a certain day, and then no one else does ever again,
+ # 1 million files will stay on the system!
+ FileUtils.rm(Dir.glob("#{DataSet.archive_temp_folder}/*").select{|f| (Time.now - File.stat(f).mtime) > TEMPFILE_LIFE}, :force => true)
+
+ # Create the zip file
+ new_zip(File.new(DataSet.archive_temp_path, "w+")) do |zipfile|
+
#Add each data item to the zip. Inputs/outputs are seperated into folders. Each input/output datum is named as
# the port it relates to, followed by a dash, followed by either the name of the file if it is a blob,
# or "text.txt" if it is just text data.
@@ -71,6 +99,14 @@
return filename
end
+ def self.archive_temp_folder
+ "tmp/data_sets"
+ end
+
+ def self.archive_temp_path
+ "#{archive_temp_folder}/#{Time.now.strftime("%Y%m%d_%H%M%S")}_#{rand(1000000)}.zip"
+ end
+
def metadata(stats)
"********** Snapshot of the data set: #{self.title} **********\r\n\r\n" +
Modified: branches/datasets/app/views/data_sets/_data_set.rhtml (2895 => 2896)
--- branches/datasets/app/views/data_sets/_data_set.rhtml 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/data_sets/_data_set.rhtml 2012-01-10 15:56:40 UTC (rev 2896)
@@ -31,8 +31,8 @@
<h3>
Input data
- <%= info_icon_with_tooltip("Input data can be supplied here along with the specific workflow port it can be fed in to.<br/>"+
- "Users can download and use this data to execute the workflow.") -%>
+ <%= info_icon_with_tooltip("Data is listed here along with the specific workflow input port it can be fed in to.<br/>"+
+ "This data can be downloaded or copied and used in the execution of the workflow.") -%>
</h3>
<table>
<tbody>
@@ -68,7 +68,7 @@
<h3>
Output data
- <%= info_icon_with_tooltip("Output data can be supplied here along with the specific workflow port it is produced from.<br/>"+
+ <%= info_icon_with_tooltip("Data is listed here along with the specific workflow output port it is produced from.<br/>"+
"This data can be used for the purpose of <b>comparison</b> against the actual data that the workflow produces") -%>
</h3>
<table>
Modified: branches/datasets/app/views/data_sets/show.rhtml (2895 => 2896)
--- branches/datasets/app/views/data_sets/show.rhtml 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/data_sets/show.rhtml 2012-01-10 15:56:40 UTC (rev 2896)
@@ -1,6 +1,7 @@
<div id="data_sets_container">
<ul class="sectionIcons">
- <% if Authorization.is_authorized?("download", nil, @workflow, current_user) %>
+ <% if @data_set.estimate_size < Conf.max_upload_size &&
+ Authorization.is_authorized?("download", nil, @workflow, current_user) %>
<li>
<%= icon('download', download_data_set_path(@data_set), nil, nil, 'Download as a zip file') %>
</li>
Modified: branches/datasets/app/views/workflows/show.rhtml (2895 => 2896)
--- branches/datasets/app/views/workflows/show.rhtml 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/app/views/workflows/show.rhtml 2012-01-10 15:56:40 UTC (rev 2896)
@@ -276,7 +276,7 @@
Data Sets
</h3>
<div class="box_infotext">
- <b>Data Sets</b> are collections of input and output data, consumed and produced by a workflow.<br/>
+ <b>Data Sets</b> are collections of data that is to be consumed, or can be produced by a workflow.<br/>
A <b>Data Set</b> can be used to:
<ul>
<li>document a complete workflow run, specifying what inputs were used and what outputs were produced.</li>
Modified: branches/datasets/test/unit/data_set_test.rb (2895 => 2896)
--- branches/datasets/test/unit/data_set_test.rb 2012-01-09 16:35:09 UTC (rev 2895)
+++ branches/datasets/test/unit/data_set_test.rb 2012-01-10 15:56:40 UTC (rev 2896)
@@ -7,12 +7,8 @@
test "can create a zip file" do
data_set = data_sets(:string_concat_v1_example)
- zip_data = data_set.create_zip(users(:john)).string
+ zip_file = data_set.create_zip(users(:john))
- zip_file = Tempfile.open("test.zip", "tmp")
- zip_file.write(zip_data)
- zip_file.close
-
Zip::ZipFile.open(zip_file.path) do |zipfile|
assert_equal 4, zipfile.entries.size
#Zip file entries seemed to be ordered according to their size
@@ -30,6 +26,6 @@
assert_equal 5, zipfile.entries[3].size
end
- zip_file.unlink
+ File.unlink(zip_file.path)
end
end