|
import os |
|
|
|
os.system("python -m pip install transformers") |
|
os.system("python -m pip install spacy==3.5.4") |
|
os.system("python -m pip install spacy-alignments==0.9.1") |
|
os.system("python -m pip install spacy-legacy==3.0.12") |
|
os.system("python -m pip install spacy-loggers==1.0.3") |
|
os.system("python -m pip install torch") |
|
os.system("python -m pip install seaborn==0.11.2") |
|
os.system("python -m pip install gradio==3.16.1") |
|
os.system("python -m pip install typer==0.4.1") |
|
os.system("python -m pip install pydantic==1.9.2") |
|
os.system("python -m pip install matplotlib==3.4.3") |
|
os.system("python -m pip install Flask") |
|
os.system("python -m pip install sty") |
|
|
|
from flask import Flask, render_template, request, Response, send_file, jsonify |
|
import hashlib |
|
from pipeline import * |
|
import csv |
|
|
|
app = Flask(__name__, template_folder='templates', static_folder='templates') |
|
|
|
|
|
UPLOAD_FOLDER = 'uploads/' |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
if not os.path.isdir(app.config['UPLOAD_FOLDER']): |
|
|
|
os.makedirs(app.config['UPLOAD_FOLDER']) |
|
|
|
|
|
ALLOWED_EXTENSIONS = {'txt'} |
|
|
|
|
|
def allowed_file(filename): |
|
return '.' in filename and \ |
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
|
|
|
|
|
def process_file(file_path): |
|
with open(file_path, 'r') as f: |
|
lines = f.readlines() |
|
results = [] |
|
for line in lines: |
|
result = run_pipeline(line) |
|
results.append(result) |
|
result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)] |
|
return result |
|
|
|
|
|
@app.route('/') |
|
def index(): |
|
|
|
return render_template('index.html') |
|
|
|
|
|
@app.route('/', methods=['POST']) |
|
def upload_file(): |
|
|
|
file = request.files['file'] |
|
|
|
|
|
if not file: |
|
return 'No file selected' |
|
|
|
|
|
if not allowed_file(file.filename): |
|
return 'File type not allowed' |
|
|
|
|
|
hash_code = hashlib.md5(file.read()).hexdigest() |
|
filename = f"{hash_code}.txt" |
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
|
|
file.seek(0) |
|
file.save(file_path) |
|
|
|
|
|
tmp_html = "<html><body><h1>Please wait while your file is being processed...</h1></body></html>" |
|
|
|
|
|
result = process_file(file_path) |
|
result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv') |
|
|
|
counts = {} |
|
individual_labels = {} |
|
props = {} |
|
for id_,text in enumerate(result): |
|
individual_labels[id_] = [] |
|
counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0} |
|
for clause in text['output'][1]: |
|
individual_labels[id_].append(clause[1]) |
|
label = labels2attrs[clause[1]] |
|
for id__,feature in enumerate(label): |
|
if "NA" not in feature: |
|
counts[id_][feature] += 1 |
|
elif id__ == 0: |
|
counts[id_]["NA genericity"] += 1 |
|
elif id__ == 1: |
|
counts[id_]["NA eventivity"] += 1 |
|
else: |
|
counts[id_]["NA boundedness"] += 1 |
|
props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']), |
|
counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), |
|
counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]), |
|
counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])] |
|
|
|
|
|
with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f: |
|
writer = csv.writer(f) |
|
writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"]) |
|
for id_ in counts.keys(): |
|
clauses = [] |
|
for clause in result[id_]["output"][0]: |
|
clauses.append("{}: {}".format(clause[1],clause[0])) |
|
clauses = "\n".join(clauses) |
|
ind_labels = "\n".join(individual_labels[id_]) |
|
extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_] |
|
writer.writerow(extracted) |
|
|
|
|
|
return send_file(result_file_path, as_attachment=True) |
|
|
|
if __name__ == '__main__': |
|
app.run(host="0.0.0.0", port=7860) |