|
import os |
|
|
|
os.system("python -m pip install transformers==4.26.1") |
|
os.system("python -m pip install spacy==3.5.4") |
|
os.system("python -m pip install spacy-alignments==0.9.1") |
|
os.system("python -m pip install spacy-legacy==3.0.12") |
|
os.system("python -m pip install spacy-loggers==1.0.3") |
|
os.system("python -m pip install torch") |
|
os.system("python -m pip install seaborn==0.11.2") |
|
os.system("python -m pip install gradio==3.16.1") |
|
os.system("python -m pip install typer==0.4.1") |
|
os.system("python -m pip install pydantic==1.9.2") |
|
os.system("python -m pip install matplotlib==3.4.3") |
|
os.system("python -m pip install Flask") |
|
os.system("python -m pip install sty==1.0.4") |
|
os.system("python -m pip install numpy==1.26.4") |
|
|
|
from flask import Flask, render_template, request, send_file, after_this_request |
|
import hashlib |
|
from pipeline import * |
|
import csv |
|
|
|
app = Flask(__name__, template_folder='templates', static_folder='templates') |
|
|
|
|
|
UPLOAD_FOLDER = 'uploads/' |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
if not os.path.isdir(app.config['UPLOAD_FOLDER']): |
|
|
|
os.makedirs(app.config['UPLOAD_FOLDER']) |
|
|
|
|
|
ALLOWED_EXTENSIONS = {'txt','csv'} |
|
|
|
|
|
def allowed_file(filename): |
|
return '.' in filename and \ |
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
|
|
|
|
|
def process_file(file_path): |
|
file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore') |
|
|
|
with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f: |
|
if file_path.endswith(".txt"): |
|
lines = f.readlines() |
|
results = [] |
|
for line in lines: |
|
if line.strip() != "": |
|
result = run_pipeline(line.strip()) |
|
results.append(result) |
|
elif file_path.endswith(".csv"): |
|
reader = csv.reader(f) |
|
for line in reader: |
|
if line[0].strip() != "": |
|
result = run_pipeline(line[0].strip()) |
|
results.append(result) |
|
result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)] |
|
@after_this_request |
|
def remove_file(response): |
|
try: |
|
os.remove(file_path) |
|
file_handle.close() |
|
except Exception as error: |
|
app.logger.error("Error removing or closing downloaded file handle", error) |
|
return response |
|
return result |
|
|
|
|
|
@app.route('/') |
|
def index(): |
|
|
|
return render_template('index.html') |
|
|
|
|
|
@app.route('/', methods=['POST']) |
|
def upload_file(): |
|
|
|
file = request.files['file'] |
|
|
|
|
|
if not file: |
|
return 'No file selected' |
|
|
|
|
|
if not allowed_file(file.filename): |
|
return 'File type not allowed' |
|
|
|
|
|
hash_code = hashlib.md5(file.read()).hexdigest() |
|
|
|
filename = f"{hash_code}.txt" |
|
|
|
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
|
|
file.seek(0) |
|
file.save(file_path) |
|
|
|
|
|
result = process_file(file_path) |
|
result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv') |
|
|
|
counts = {} |
|
individual_labels = {} |
|
props = {} |
|
for id_,text in enumerate(result): |
|
individual_labels[id_] = [] |
|
counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0} |
|
for clause in text['output'][1]: |
|
individual_labels[id_].append(clause[1]) |
|
label = labels2attrs[clause[1]] |
|
for id__,feature in enumerate(label): |
|
if "NA" not in feature: |
|
counts[id_][feature] += 1 |
|
elif id__ == 0: |
|
counts[id_]["NA genericity"] += 1 |
|
elif id__ == 1: |
|
counts[id_]["NA eventivity"] += 1 |
|
else: |
|
counts[id_]["NA boundedness"] += 1 |
|
props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']), |
|
counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), |
|
counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]), |
|
counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])] |
|
|
|
|
|
with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f: |
|
writer = csv.writer(f) |
|
writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"]) |
|
for id_ in counts.keys(): |
|
clauses = [] |
|
for clause in result[id_]["output"][0]: |
|
clauses.append("{}: {}".format(clause[1],clause[0])) |
|
clauses = "\n".join(clauses) |
|
ind_labels = "\n".join(individual_labels[id_]) |
|
extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_] |
|
writer.writerow(extracted) |
|
|
|
|
|
return send_file(result_file_path, as_attachment=True) |
|
|
|
if __name__ == '__main__': |
|
app.run(host="0.0.0.0", port=7860) |