File size: 6,836 Bytes
20f48d6 ac8518e ca1cd14 ac8518e 301671d e5de71c ac8518e 461f0ea 95fc20d 461f0ea 95fc20d 461f0ea 95fc20d 461f0ea 95fc20d 461f0ea 95fc20d 461f0ea e8c8f24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
os.system("python -m pip install transformers==4.26.1")
os.system("python -m pip install spacy==3.5.4")
os.system("python -m pip install spacy-alignments==0.9.1")
os.system("python -m pip install spacy-legacy==3.0.12")
os.system("python -m pip install spacy-loggers==1.0.3")
os.system("python -m pip install torch")
os.system("python -m pip install seaborn==0.11.2")
os.system("python -m pip install gradio==3.16.1")
os.system("python -m pip install typer==0.4.1")
os.system("python -m pip install pydantic==1.9.2")
os.system("python -m pip install matplotlib==3.4.3")
os.system("python -m pip install Flask")
os.system("python -m pip install sty==1.0.4")
os.system("python -m pip install numpy==1.26.4")
from flask import Flask, render_template, request, send_file, after_this_request
import hashlib
from pipeline import *
import csv
app = Flask(__name__, template_folder='templates', static_folder='templates')
# Set the folder for saving uploaded files
UPLOAD_FOLDER = 'uploads/'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.isdir(app.config['UPLOAD_FOLDER']):
# Create the directory if it doesn't exist
os.makedirs(app.config['UPLOAD_FOLDER'])
# Allowable file extensions for uploading
ALLOWED_EXTENSIONS = {'txt','csv'}
# Check if a file has an allowable extension
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
def process_file(file_path):
file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')
with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
if file_path.endswith(".txt"):
lines = f.readlines()
results = []
for line in lines:
if line.strip() != "":
result = run_pipeline(line.strip())
results.append(result)
elif file_path.endswith(".csv"):
reader = csv.reader(f)
for line in reader:
if line[0].strip() != "":
result = run_pipeline(line[0].strip())
results.append(result)
result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
@after_this_request
def remove_file(response):
try:
os.remove(file_path)
file_handle.close()
except Exception as error:
app.logger.error("Error removing or closing downloaded file handle", error)
return response
return result
# Home page route that allows users to upload files
@app.route('/')
def index():
# Otherwise, render the index page
return render_template('index.html')
# Handle file upload requests
@app.route('/', methods=['POST'])
def upload_file():
# Get the uploaded file
file = request.files['file']
# If the user did not select any file, return an error message
if not file:
return 'No file selected'
# If the file type is not allowed, return an error message
if not allowed_file(file.filename):
return 'File type not allowed'
# Generate a unique hash code for the file name
hash_code = hashlib.md5(file.read()).hexdigest()
# if ".txt" in filename:
filename = f"{hash_code}.txt"
# elif ".csv" in filename:
# filename = f"{hash_code}.csv"
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
# Save the uploaded file
file.seek(0)
file.save(file_path)
# Process the uploaded file and return the result as a JSON line file
result = process_file(file_path)
result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')
counts = {}
individual_labels = {}
props = {}
for id_,text in enumerate(result):
individual_labels[id_] = []
counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
for clause in text['output'][1]:
individual_labels[id_].append(clause[1])
label = labels2attrs[clause[1]]
for id__,feature in enumerate(label):
if "NA" not in feature:
counts[id_][feature] += 1
elif id__ == 0:
counts[id_]["NA genericity"] += 1
elif id__ == 1:
counts[id_]["NA eventivity"] += 1
else:
counts[id_]["NA boundedness"] += 1
props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]
with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
writer = csv.writer(f)
writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
for id_ in counts.keys():
clauses = []
for clause in result[id_]["output"][0]:
clauses.append("{}: {}".format(clause[1],clause[0]))
clauses = "\n".join(clauses)
ind_labels = "\n".join(individual_labels[id_])
extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
writer.writerow(extracted)
# Return the result file as a download once the processing is complete
return send_file(result_file_path, as_attachment=True)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860) |