[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Savannah-hackers-public] [PATCH 1/6] Add mime-type based processing of
From: |
Bruno Félix Rezende Ribeiro |
Subject: |
[Savannah-hackers-public] [PATCH 1/6] Add mime-type based processing of packages. Now it's easy to extend the code to recursively process arbitrary types of packages. |
Date: |
Wed, 23 Sep 2015 04:02:39 -0300 |
Remove Tar archive single top-level directory restriction. Now
archives with multiple top-level directories or no top-level directory
at all are processed correctly.
Remove the URL regexp restriction. Now it processes any URL, as
expected by a local requester. Security concerns raised by remote
requests should be handled elsewhere.
Define wget as the standard fallback method for processing an URL when
no other method is suitable.
* gsv-eval-remote.sh (fetch_package): new function.
* gsv-eval-remote.sh (process_package): new function.
* gsv-eval-remote.sh (TARBALL_*): replaced by DOWNLOAD_*.
---
gsv-eval-remote.sh | 163 ++++++++++++++++++++++++++++-------------------------
1 file changed, 85 insertions(+), 78 deletions(-)
diff --git a/gsv-eval-remote.sh b/gsv-eval-remote.sh
index 14f3534..3298dc6 100755
--- a/gsv-eval-remote.sh
+++ b/gsv-eval-remote.sh
@@ -1,6 +1,7 @@
#!/bin/sh
# Copyright (C) 2014 Assaf Gordon (address@hidden)
+# Copyright (C) 2015 Bruno Félix Rezende Ribeiro <address@hidden>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -21,18 +22,8 @@
## on a given directory, then generates an HTML report for it.
##
-# Size limit of downloadable tarballs (in bytes)
-TARBALL_SIZE_LIMIT=10000000
-
-# Ugly hack:
-# When given a tarball to download, limit the accepted URLs to this
-# (very partial) character set.
-# Since this script will be used from a website, and users can post
-# which even URLs they want, this regex will hopefully avoid some potential
-# problems (such as URLs doing 'GET' requests with CGI parameters).
-# The downside is that some legitimate URLs will not work (e.g.
-# some SourceForge URLs with extra CGI parameters).
-TARBALL_REGEX='^(https?|ftp)://[A-Za-z0-9\_\.\/-]*\.tar\.(gz|bz2|xz)$'
+# Size limit of downloadable file (in bytes)
+DOWNLOAD_SIZE_LIMIT=10000000
OUTPUT_FILE=
@@ -49,6 +40,7 @@ usage()
BASE=$(basename "$0")
echo "GNU-Savannah Evaluation - helper script
Copyright (C) 2014 A. Gordon (address@hidden)
+Copyright (C) 2015 Bruno Félix Rezende Ribeiro <address@hidden>
License: GPLv3-or-later
Usage: $BASE [OPTIONS] OUTPUT-HTML PROJECT-NAME SOURCE-URL
@@ -56,15 +48,6 @@ Usage: $BASE [OPTIONS] OUTPUT-HTML PROJECT-NAME
SOURCE-URL
Will download SOURCE-URL, run the gnu-savannal evaluation perl script
on the download files, and produce an HTML file named OUTPUT-HTML.
-SOURCE-URL can be:
- http://
- https://
- ftp://
- git://
- tar.gz
- tar.bz2
- tar.xz
-
Options:
-h = show this help screen.
@@ -86,9 +69,80 @@ and generate '/tmp/out.html' report:
exit 0
}
+fetch_package() {
+
+ ## Find size before download
+ DOWNLOAD_HEAD=$(curl -f --silent -L --insecure --head "$1") \
+ || die "Failed to get size of '$1' (using HTTP HEAD)"
+ DOWNLOAD_SIZE=$(echo "$DOWNLOAD_HEAD" |
+ tr -d '\r' |
+ grep Content-Length |
+ tail -n 1 |
+ awk '{print $2}' ) \
+ || die "failed to get size (content-length) of '$1'"
+ test -z "$DOWNLOAD_SIZE" \
+ && die "failed to get size (content-length) of '$1'"
+ test "$DOWNLOAD_SIZE" -le "$DOWNLOAD_SIZE_LIMIT" \
+ || die "tarball '$1' size too big ($DOWNLOAD_SIZE)," \
+ "current limit is $DOWNLOAD_SIZE_LIMIT bytes."
+
+ ## a remote wget-fetchable source
+ TMP1=$(basename "$1") \
+ || die "failed to get basename of '$1'"
+ wget -q --no-check-certificate -O "$TMP1" "$1" \
+ || die "failed to download '$1'"
+
+ echo "$TMP1"
+}
+
+process_package() {
+ local DIRECTORY
+
+ case $(echo $(file -b --mime-type "$1") $(file -b "$1")) in
+ *application/gzip*)
+ gunzip "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-bzip2*)
+ bunzip2 "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-lzip*)
+ lzip -d "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-lzma*)
+ unlzma "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *'lzop compressed data'*)
+ lzop -d "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-xz*)
+ unxz "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-compress*)
+ compress -d "$1" || die "failed to decompress '$1'"
+ process_package "${1%.*}"
+ ;;
+ *application/x-tar*)
+ DIRECTORY=$(mktemp -d x-tar.XXXXXX) \
+ || die "failed to create temporary directory"
+ tar -xf "$1" -C "$DIRECTORY" || die "failed to extract files from
'$1'"
+ cd "$DIRECTORY"
+ pwd
+ ;;
+ *)
+ die "there is no known method to process '$1'"
+ ;;
+ esac
+}
+
test "x$1" = "x-h" && usage
-OUTPUT_HTML=$1
+OUTPUT_HTML=$(realpath $1)
PROJECT_NAME=$2
SOURCE=$3
@@ -101,6 +155,7 @@ test -z "$SOURCE" \
touch "$OUTPUT_HTML" \
|| die "failed to create output file '$OUTPUT_HTML'"
+
## From here on, we can at least log the errors into the output HTML file
OUTPUT_FILE="$OUTPUT_HTML"
@@ -116,19 +171,8 @@ CSS_FILE="$SCRIPTPATH/gsv-eval.css"
test -e "$CSS_FILE" \
|| die "CSS file ($CSS_FILE) not found"
-# Ugly Hack:
-# If given a URL, but one that doesn't match the stricter REGEX, exit
-# with a detailed explanation
-if echo "$SOURCE" | grep -E -q '^(https?|ftp)://' ; then
- if ! echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ; then
- die "the given URL ($SOURCE) does not match the stricter URL " \
- " limitations of this script (which are '$TARBALL_REGEX'). " \
- "Consider running this script locally."
- fi
-fi
-
##
-## Create temporary directroy to process the file
+## Create temporary directory to process the file
##
DIRECTORY=$(mktemp -d /tmp/gnu_eval.XXXXXX) \
|| die "failed to create temporary directory"
@@ -151,59 +195,22 @@ if echo "$SOURCE" | grep -E -q '^git://|\.git$' ; then
cd "$SOURCEDIR" \
|| die "failed to CD into source directory '$SOURCEDIR' " \
"(based on 'git clone $SOURCE')"
-
-elif echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ;
- then
- ##
- ## a Tarball source
- ##
-
- ## Find size before download
- TARBALL_HEAD=$(curl -f --silent -L --insecure --head "$SOURCE") \
- || die "Failed to get size of '$SOURCE' (using HTTP HEAD)"
- TARBALL_SIZE=$(echo "$TARBALL_HEAD" |
- tr -d '\r' |
- grep Content-Length |
- tail -n 1 |
- awk '{print $2}' ) \
- || die "failed to get size (content-length) of '$SOURCE'"
- test -z "$TARBALL_SIZE" \
- && die "failed to get size (content-length) of '$SOURCE'"
- test "$TARBALL_SIZE" -le "$TARBALL_SIZE_LIMIT" \
- || die "tarball '$SOURCE' size too big ($TARBALL_SIZE)," \
- "current limit is $TARBALL_SIZE_LIMIT bytes."
-
- ## a remote tarball source
- TMP1=$(basename "$SOURCE") \
- || die "failed to get basename of '$SOURCE'"
- wget -q --no-check-certificate -O "$TMP1" "$SOURCE" \
- || die "failed to download '$SOURCE'"
-
- ## GNU Tar should automatically detect and uncompress the tarball.
- tar -xf "$TMP1" \
- || die "failed to extract files from '$TMP1' (from '$SOURCE')"
-
+else
##
- ## Some tarballs contain directories that are named differently than
- ## the tarball. Annoying, but common enough.
- ## So search for one sub-directory.
+ ## a wget-fetchable package
##
- COUNT=$(find . -maxdepth 1 -type d | sed 1d | wc -l)
- test "$COUNT" -eq 1 \
- || die "tarball '$SOURCE' contains more than one sub-directory."
- SOURCEDIR=$(find . -maxdepth 1 -type d | sed 1d)
- cd "$SOURCEDIR" \
- || die "failed to CD into '$SOURCEDIR' (extracted from '$SOURCE')"
-else
- die "Unknown source type (SOURCE) - expecting GIT or TARBALL on HTTP/FTP"
+ PACKAGE_FILE=$(fetch_package "$SOURCE") \
+ || die "failed to fetch '$SOURCE'"
+ PACKAGE_DIRECTORY=$(process_package "$PACKAGE_FILE") \
+ || die "failed to process '$PACKAGE_FILE'"
fi
##
## Analize the project
##
"$EVAL_SCRIPT" --project "$PROJECT_NAME" \
- "$DIRECTORY/$SOURCEDIR" > "$DIRECTORY/eval.md" \
+ "$PACKAGE_DIRECTORY" > "$DIRECTORY/eval.md" \
|| die "evaluation script failed (on '$SOURCE')"
pandoc --from markdown \
--
2.1.4
- [Savannah-hackers-public] [PATCH 1/6] Add mime-type based processing of packages. Now it's easy to extend the code to recursively process arbitrary types of packages.,
Bruno Félix Rezende Ribeiro <=
- [Savannah-hackers-public] [PATCH 2/6] gsv-eval-remote.sh (process_package): add zip archive support., Bruno Félix Rezende Ribeiro, 2015/09/23
- [Savannah-hackers-public] [PATCH 6/6] gsv-eval-remote.sh (process_package): add method for single plain-text file packages., Bruno Félix Rezende Ribeiro, 2015/09/23
- [Savannah-hackers-public] [PATCH 4/6] gsv-eval-remote.sh (process_package): add rar archive support., Bruno Félix Rezende Ribeiro, 2015/09/23
- [Savannah-hackers-public] [PATCH 3/6] gsv-eval-remote.sh (process_package): improve error reporting of unknown method., Bruno Félix Rezende Ribeiro, 2015/09/23
- [Savannah-hackers-public] [PATCH 5/6] gsv-eval-remote.sh (process_package): normalize archive methods error message., Bruno Félix Rezende Ribeiro, 2015/09/23