Fixed file upload de-activation in Scrub

scottkleinman · scottkleinman · commit f27db76ca3da · 2015-10-14T11:17:03.000-07:00
diff --git a/lexos.py b/lexos.py
@@ -148,6 +148,14 @@ def select_old():
 
     return ''  # Return an empty string because you have to return something
 
+@app.route("/removeUploadLabels", methods=["GET", "POST"])  # Tells Flask to handle ajax request from '/scrub'
+def removeUploadLabels():
+    """
+    Removes Scrub upload files from the session when the labels are clicked.
+    """
+    option = request.headers["option"]
+    session['scrubbingoptions']['optuploadnames'][option] = ''
+    return "success"
 
 @app.route("/scrub", methods=["GET", "POST"])  # Tells Flask to load this function when someone is at '/scrub'
 def scrub():
@@ -946,6 +954,169 @@ def manage():
     managers.utility.saveFileManager(fileManager)
     return ''  # Return an empty string because you have to return something
 
+@app.route("/gutenberg", methods=["GET", "POST"])  # Tells Flask to load this function when someone is at '/module'
+def gutenberg():
+    """
+    Generic module for saving text stored as a variable to the file manager. It mostly just illustrates how 
+    to access the file manager.
+    """
+    fileManager = managers.utility.loadFileManager()
+
+    if request.method == "GET":
+        # "GET" request occurs when the page is first loaded.
+
+        # Get a dictionary of the currently active files' labels.
+        labels = fileManager.getActiveLabels()
+
+        message = "Submit to load file"
+
+        return render_template('gutenberg.html', message=message)
+
+    if request.method == "POST":
+        # "POST" request occur when html form is submitted
+        labels = fileManager.getActiveLabels()
+
+        # Get the request variable
+        s = request.form["urls"]
+        formLines = [l for l in s.split("\n") if l]
+
+        #import os, urllib # imported by lexos.py
+        import re, shutil, urllib
+
+        remove = ["Produced by","End of the Project Gutenberg","End of Project Gutenberg"]
+        savedFiles = "<ol>"
+
+        ''' Reads a raw Project Gutenberg etext, reformat paragraphs,
+        and removes fluff.  Determines the title of the book and uses it
+        as a filename to write the resulting output text. '''
+        for url in formLines:
+            f = urllib.urlopen(url)
+            data = f.readlines()
+            f.close()
+            lines = [line.strip() for line in data]
+            collect = False
+            lookforsubtitle = False
+            outlines = []
+            startseen = endseen = False
+            authorLastName = ""
+            title=""
+            one="<?xml version=\"1.0\" encoding=\"utf-8\"?><TEI xmlns=\"http://www.tei-c.org/ns/1.0\" version=\"5.0\"><teiHeader><fileDesc><titleStmt>"
+            two = "</titleStmt><publicationStmt><publisher></publisher><pubPlace></pubPlace><availability status=\"free\"><p>Project Gutenberg</p></availability></publicationStmt><seriesStmt><title>Project Gutenberg Full-Text Database</title></seriesStmt><sourceDesc default=\"false\"><biblFull default=\"false\"><titleStmt>"
+            three = "</titleStmt><extent></extent><publicationStmt><publisher></publisher><pubPlace></pubPlace><date></date></publicationStmt></biblFull></sourceDesc></fileDesc><encodingDesc><editorialDecl default=\"false\"><p>Preliminaries omitted.</p></editorialDecl></encodingDesc></teiHeader><text><body><div>"
+            for line in lines:
+                if line.startswith("Author: "):
+                    author = line[8:]
+                    authorLastName = author
+                    authorTemp = line[8:]
+                    continue
+                if line.startswith("Title: "):
+                    title = line[7:]
+                    titleTemp = line[7:]
+                    lookforsubtitle = True
+                    continue
+                if lookforsubtitle:
+                    if not line.strip():
+                        lookforsubtitle = False
+                    else:
+                        subtitle = line.strip()
+                        subtitle = subtitle.strip(".")
+                        title += ", " + subtitle
+                if ("*** START" in line) or ("***START" in line):
+                    collect = startseen = True
+                    paragraph = ""
+                    continue
+                if ("*** END" in line) or ("***END" in line):
+                    endseen = True
+                    break
+                if not collect:
+                    continue
+                if (titleTemp) and (authorTemp):
+                    outlines.append(one)
+                    outlines.append("<title>")
+                    outlines.append(titleTemp)
+                    outlines.append("</title>")
+                    outlines.append("<author>")
+                    outlines.append(authorTemp)
+                    outlines.append("</author>")
+                    outlines.append(two)
+                    outlines.append("<title>")
+                    outlines.append(titleTemp)
+                    outlines.append("</title>")
+                    outlines.append("<author>")
+                    outlines.append(authorTemp)
+                    outlines.append("</author>")
+                    outlines.append(three)
+                    authorTemp = False
+                    titleTemp = False
+                    continue
+                if not line:
+                    paragraph = paragraph.strip()
+                    for term in remove:
+                        if paragraph.startswith(term):
+                            paragraph = ""
+                    if paragraph:
+                        paragraph = paragraph.replace("&", "&")
+                        outlines.append(paragraph)
+                        outlines.append("</p>")
+                    paragraph = "<p>"
+                else:
+                    paragraph += " " + line
+            
+            # Get author lastname
+            authorLastName = authorLastName.split(" ")
+            authorLastName = authorLastName[-1].lower()
+    
+            # Get short title
+            shortTitle = title.replace(":", "_")
+            shortTitle = shortTitle.replace(",", "_")
+            shortTitle = shortTitle.replace(" ", "")
+            first_cap_re = re.compile('(.)([A-Z][a-z]+)')
+            all_cap_re = re.compile('([a-z0-9])([A-Z])')
+            shortTitle = first_cap_re.sub(r'\1_\2', shortTitle)
+            shortTitle = all_cap_re.sub(r'\1_\2', shortTitle).lower()
+            shortTitle = shortTitle.replace("__", "_")
+
+            # Compose a filename.  Replace some illegal file name characters with alternatives.
+            filename = url.split("/")
+            ofn = filename[-1]
+            ofn = authorLastName + "_" + shortTitle[:150] + ".xml"
+            ofn = ofn.replace("&", "")
+            ofn = ofn.replace("/", "")
+            ofn = ofn.replace("\"", "")
+            ofn = ofn.replace(":", "")
+            ofn = ofn.replace(",", "")
+            ofn = ofn.replace(" ", "")
+            ofn = ofn.replace("txt", "xml")
+        
+            outlines.append("</div></body></text></TEI>")
+            text = "\n".join(outlines)
+            text = re.sub("End of the Project Gutenberg .*", "", text, re.M)
+            text = re.sub("Produced by .*", "", text, re.M)
+            text = re.sub("<p>\s+<\/p>", "", text)
+            text = re.sub("\s+", " ", text)
+
+            # Save the file to the file manager
+            savedFiles += "<li>" + ofn + "</li>"
+            fileManager.addUploadFile(text, ofn)
+
+        # Read from a list of urls
+        #outputDir = "/Path/to/your/ProjectGutenberg/TEI/Output/files/"
+        #urls = ['http://www.gutenberg.org/cache/epub/42324/pg42324.txt']
+        #for url in urls:
+        #    ofn, text = beautify(url, outputDir, url)
+        #    print(ofn+":")
+        #    print(text[:10000])
+
+        # Save the file to the file manager
+        #fileManager.addUploadFile(doc, fileName)
+
+        message = savedFiles + "</ol>"
+
+        # Save the file manager
+        managers.utility.saveFileManager(fileManager)
+
+        return render_template('gutenberg.html', message=message)
+
 # ======= End of temporary development functions ======= #
 
 install_secret_key()
diff --git a/static/css/style.css b/static/css/style.css
@@ -1128,7 +1128,7 @@ textarea.manualinput {
 
 .bttnfilelabels {
   cursor: pointer;
-  text-align: center;
+  text-align: right;
   color: #00B226;
 }
 
@@ -1180,7 +1180,7 @@ textarea.manualinput {
 }
 
 #swfileselectbttnlabel, #lemfileselectbttnlabel, #consfileselectbttnlabel, #scfileselectbttnlabel {
-  margin-left: 10px;
+  margin-left: 25px;
 }
 
 .advanced-option {
diff --git a/static/js/scripts_scrub.js b/static/js/scripts_scrub.js
@@ -11,38 +11,60 @@ $(function() {
 */
 	$('#swfileselect').change(function(ev) {
 		filename = ev.target.files[0].name;
-		if (filename.length > 35) {filename = filename.substring(0, 34) + "...";}
+		if (filename.length > 25) {filename = filename.substring(0, 24) + "...";}
 		$("#swfileselectbttnlabel").html(filename);
 	});
 
 	$('#lemfileselect').change(function(ev) {
 		filename = ev.target.files[0].name;
-		if (filename.length > 35) {filename = filename.substring(0, 34) + "...";}
+		if (filename.length > 25) {filename = filename.substring(0, 24) + "...";}
 		$("#lemfileselectbttnlabel").html(filename);
 	});
 
 	$('#consfileselect').change(function(ev) {
 		filename = ev.target.files[0].name;
-		if (filename.length > 35) {filename = filename.substring(0, 34) + "...";}
+		if (filename.length > 25) {filename = filename.substring(0, 24) + "...";}
 		$("#consfileselectbttnlabel").html(filename);
 	});
 
 	$('#scfileselect').change(function(ev) {
 		filename = ev.target.files[0].name;
-		if (filename.length > 35) {filename = filename.substring(0, 34) + "...";}
+		if (filename.length > 25) {filename = filename.substring(0, 24) + "...";}
 		$("#scfileselectbttnlabel").html(filename);
 	});
 
 
 	$(".bttnfilelabels").click( function() {
+		//swfileselect, lemfileselect, consfileselect, scfileselect
 		var filetype = $(this).attr('id').replace('bttnlabel', '');
 		usingCache = $('#usecache'+filetype).attr('disabled') != 'disabled';
 
 		if ((usingCache) || ($(this).attr('id') != '')) {
-			$(this).siblings('.scrub-upload').attr('value', '');
+			//$(this).siblings('.scrub-upload').attr('value', '');
+			// Next two lines clear the file input; it's hard to find a cross-browser solution			
+			$("#"+filetype).val('');
+			$("#"+filetype).replaceWith($("#"+filetype).clone(true));
 			$("#usecache"+filetype).attr('disabled', 'disabled');
 			$(this).text('');
 		}
+
+		// Do Ajax
+        $.ajax({
+            type: "POST",
+            url: "/removeUploadLabels",
+            data: $(this).text().toString(),
+            contentType: 'text/plain',
+            headers: { 'option': filetype+'[]' },
+            beforeSend: function(){
+                //alert('Sending...');
+            },
+            success: function(response) {
+                //console.log(response);
+            },
+            error: function(jqXHR, textStatus, errorThrown){
+                console.log("Error: " + errorThrown);
+            }
+		});
 	});
 
 	$("#punctbox").click( function() {

Original file line number	Diff line number	Diff line change
`@@ -1128,7 +1128,7 @@ textarea.manualinput {`
`1128`	`1128`
`1129`	`1129`	`.bttnfilelabels {`
`1130`	`1130`	`cursor: pointer;`
`1131`		`- text-align: center;`
	`1131`	`+ text-align: right;`
`1132`	`1132`	`color: #00B226;`
`1133`	`1133`	`}`
`1134`	`1134`
`@@ -1180,7 +1180,7 @@ textarea.manualinput {`
`1180`	`1180`	`}`
`1181`	`1181`
`1182`	`1182`	`#swfileselectbttnlabel, #lemfileselectbttnlabel, #consfileselectbttnlabel, #scfileselectbttnlabel {`
`1183`		`- margin-left: 10px;`
	`1183`	`+ margin-left: 25px;`
`1184`	`1184`	`}`
`1185`	`1185`
`1186`	`1186`	`.advanced-option {`