import os os.system("python -m pip install transformers==4.26.1") os.system("python -m pip install spacy==3.5.4") os.system("python -m pip install spacy-alignments==0.9.1") os.system("python -m pip install spacy-legacy==3.0.12") os.system("python -m pip install spacy-loggers==1.0.3") os.system("python -m pip install torch") os.system("python -m pip install seaborn==0.11.2") os.system("python -m pip install gradio==3.16.1") os.system("python -m pip install typer==0.4.1") os.system("python -m pip install pydantic==1.9.2") os.system("python -m pip install matplotlib==3.4.3") os.system("python -m pip install Flask") os.system("python -m pip install sty==1.0.4") os.system("python -m pip install numpy==1.26.4") from flask import Flask, render_template, request, send_file, after_this_request import hashlib from pipeline import * import csv app = Flask(__name__, template_folder='templates', static_folder='templates') # Set the folder for saving uploaded files UPLOAD_FOLDER = 'uploads/' app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER if not os.path.isdir(app.config['UPLOAD_FOLDER']): # Create the directory if it doesn't exist os.makedirs(app.config['UPLOAD_FOLDER']) # Allowable file extensions for uploading ALLOWED_EXTENSIONS = {'txt','csv'} # Check if a file has an allowable extension def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output def process_file(file_path): file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore') with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f: if file_path.endswith(".txt"): lines = f.readlines() results = [] for line in lines: if line.strip() != "": result = run_pipeline(line.strip()) results.append(result) elif file_path.endswith(".csv"): reader = csv.reader(f) for line in reader: if line[0].strip() != "": result = run_pipeline(line[0].strip()) results.append(result) result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)] @after_this_request def remove_file(response): try: os.remove(file_path) file_handle.close() except Exception as error: app.logger.error("Error removing or closing downloaded file handle", error) return response return result # Home page route that allows users to upload files @app.route('/') def index(): # Otherwise, render the index page return render_template('index.html') # Handle file upload requests @app.route('/', methods=['POST']) def upload_file(): # Get the uploaded file file = request.files['file'] # If the user did not select any file, return an error message if not file: return 'No file selected' # If the file type is not allowed, return an error message if not allowed_file(file.filename): return 'File type not allowed' # Generate a unique hash code for the file name hash_code = hashlib.md5(file.read()).hexdigest() # if ".txt" in filename: filename = f"{hash_code}.txt" # elif ".csv" in filename: # filename = f"{hash_code}.csv" file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) # Save the uploaded file file.seek(0) file.save(file_path) # Process the uploaded file and return the result as a JSON line file result = process_file(file_path) result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv') counts = {} individual_labels = {} props = {} for id_,text in enumerate(result): individual_labels[id_] = [] counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0} for clause in text['output'][1]: individual_labels[id_].append(clause[1]) label = labels2attrs[clause[1]] for id__,feature in enumerate(label): if "NA" not in feature: counts[id_][feature] += 1 elif id__ == 0: counts[id_]["NA genericity"] += 1 elif id__ == 1: counts[id_]["NA eventivity"] += 1 else: counts[id_]["NA boundedness"] += 1 props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']), counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]), counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])] with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f: writer = csv.writer(f) writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"]) for id_ in counts.keys(): clauses = [] for clause in result[id_]["output"][0]: clauses.append("{}: {}".format(clause[1],clause[0])) clauses = "\n".join(clauses) ind_labels = "\n".join(individual_labels[id_]) extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_] writer.writerow(extracted) # Return the result file as a download once the processing is complete return send_file(result_file_path, as_attachment=True) if __name__ == '__main__': app.run(host="0.0.0.0", port=7860)