BabakScrapes's picture
Update app.py
ca1cd14 verified
import os
os.system("python -m pip install transformers==4.26.1")
os.system("python -m pip install spacy==3.5.4")
os.system("python -m pip install spacy-alignments==0.9.1")
os.system("python -m pip install spacy-legacy==3.0.12")
os.system("python -m pip install spacy-loggers==1.0.3")
os.system("python -m pip install torch")
os.system("python -m pip install seaborn==0.11.2")
os.system("python -m pip install gradio==3.16.1")
os.system("python -m pip install typer==0.4.1")
os.system("python -m pip install pydantic==1.9.2")
os.system("python -m pip install matplotlib==3.4.3")
os.system("python -m pip install Flask")
os.system("python -m pip install sty==1.0.4")
os.system("python -m pip install numpy==1.26.4")
from flask import Flask, render_template, request, send_file, after_this_request
import hashlib
from pipeline import *
import csv
app = Flask(__name__, template_folder='templates', static_folder='templates')
# Set the folder for saving uploaded files
UPLOAD_FOLDER = 'uploads/'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.isdir(app.config['UPLOAD_FOLDER']):
# Create the directory if it doesn't exist
os.makedirs(app.config['UPLOAD_FOLDER'])
# Allowable file extensions for uploading
ALLOWED_EXTENSIONS = {'txt','csv'}
# Check if a file has an allowable extension
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
def process_file(file_path):
file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')
with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
if file_path.endswith(".txt"):
lines = f.readlines()
results = []
for line in lines:
if line.strip() != "":
result = run_pipeline(line.strip())
results.append(result)
elif file_path.endswith(".csv"):
reader = csv.reader(f)
for line in reader:
if line[0].strip() != "":
result = run_pipeline(line[0].strip())
results.append(result)
result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
@after_this_request
def remove_file(response):
try:
os.remove(file_path)
file_handle.close()
except Exception as error:
app.logger.error("Error removing or closing downloaded file handle", error)
return response
return result
# Home page route that allows users to upload files
@app.route('/')
def index():
# Otherwise, render the index page
return render_template('index.html')
# Handle file upload requests
@app.route('/', methods=['POST'])
def upload_file():
# Get the uploaded file
file = request.files['file']
# If the user did not select any file, return an error message
if not file:
return 'No file selected'
# If the file type is not allowed, return an error message
if not allowed_file(file.filename):
return 'File type not allowed'
# Generate a unique hash code for the file name
hash_code = hashlib.md5(file.read()).hexdigest()
# if ".txt" in filename:
filename = f"{hash_code}.txt"
# elif ".csv" in filename:
# filename = f"{hash_code}.csv"
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
# Save the uploaded file
file.seek(0)
file.save(file_path)
# Process the uploaded file and return the result as a JSON line file
result = process_file(file_path)
result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')
counts = {}
individual_labels = {}
props = {}
for id_,text in enumerate(result):
individual_labels[id_] = []
counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
for clause in text['output'][1]:
individual_labels[id_].append(clause[1])
label = labels2attrs[clause[1]]
for id__,feature in enumerate(label):
if "NA" not in feature:
counts[id_][feature] += 1
elif id__ == 0:
counts[id_]["NA genericity"] += 1
elif id__ == 1:
counts[id_]["NA eventivity"] += 1
else:
counts[id_]["NA boundedness"] += 1
props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]
with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
writer = csv.writer(f)
writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
for id_ in counts.keys():
clauses = []
for clause in result[id_]["output"][0]:
clauses.append("{}: {}".format(clause[1],clause[0]))
clauses = "\n".join(clauses)
ind_labels = "\n".join(individual_labels[id_])
extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
writer.writerow(extracted)
# Return the result file as a download once the processing is complete
return send_file(result_file_path, as_attachment=True)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)