File size: 6,095 Bytes
20f48d6 f3d52af dd98fb4 f3d52af 461f0ea e8c8f24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
os.system("python -m pip install transformers")
os.system("python -m pip install spacy==3.5.4")
os.system("python -m pip install spacy-alignments==0.9.1")
os.system("python -m pip install spacy-legacy==3.0.12")
os.system("python -m pip install spacy-loggers==1.0.3")
os.system("python -m pip install torch")
os.system("python -m pip install seaborn==0.11.2")
os.system("python -m pip install gradio==3.16.1")
os.system("python -m pip install typer==0.4.1")
os.system("python -m pip install pydantic==1.9.2")
os.system("python -m pip install matplotlib==3.4.3")
os.system("python -m pip install Flask")
os.system("python -m pip install sty")
from flask import Flask, render_template, request, Response, send_file, jsonify
import hashlib
from pipeline import *
import csv
app = Flask(__name__, template_folder='templates', static_folder='templates')
# Set the folder for saving uploaded files
UPLOAD_FOLDER = 'uploads/'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.isdir(app.config['UPLOAD_FOLDER']):
# Create the directory if it doesn't exist
os.makedirs(app.config['UPLOAD_FOLDER'])
# Allowable file extensions for uploading
ALLOWED_EXTENSIONS = {'txt'}
# Check if a file has an allowable extension
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
def process_file(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
results = []
for line in lines:
result = run_pipeline(line)
results.append(result)
result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
return result
# Home page route that allows users to upload files
@app.route('/')
def index():
# Otherwise, render the index page
return render_template('index.html')
# Handle file upload requests
@app.route('/', methods=['POST'])
def upload_file():
# Get the uploaded file
file = request.files['file']
# If the user did not select any file, return an error message
if not file:
return 'No file selected'
# If the file type is not allowed, return an error message
if not allowed_file(file.filename):
return 'File type not allowed'
# Generate a unique hash code for the file name
hash_code = hashlib.md5(file.read()).hexdigest()
filename = f"{hash_code}.txt"
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
# Save the uploaded file
file.seek(0)
file.save(file_path)
# Create a temporary HTML page to inform the user that the file is being processed
tmp_html = "<html><body><h1>Please wait while your file is being processed...</h1></body></html>"
# Process the uploaded file and return the result as a JSON line file
result = process_file(file_path)
result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')
counts = {}
individual_labels = {}
props = {}
for id_,text in enumerate(result):
individual_labels[id_] = []
counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
for clause in text['output'][1]:
individual_labels[id_].append(clause[1])
label = labels2attrs[clause[1]]
for id__,feature in enumerate(label):
if "NA" not in feature:
counts[id_][feature] += 1
elif id__ == 0:
counts[id_]["NA genericity"] += 1
elif id__ == 1:
counts[id_]["NA eventivity"] += 1
else:
counts[id_]["NA boundedness"] += 1
props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]
with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
writer = csv.writer(f)
writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
for id_ in counts.keys():
clauses = []
for clause in result[id_]["output"][0]:
clauses.append("{}: {}".format(clause[1],clause[0]))
clauses = "\n".join(clauses)
ind_labels = "\n".join(individual_labels[id_])
extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
writer.writerow(extracted)
# Return the result file as a download once the processing is complete
return send_file(result_file_path, as_attachment=True)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860) |