File size: 6,836 Bytes
20f48d6
ac8518e
ca1cd14
ac8518e
 
 
 
 
 
 
 
 
 
 
301671d
e5de71c
ac8518e
 
461f0ea
 
 
 
 
 
 
 
 
 
 
 
 
 
95fc20d
461f0ea
 
 
 
 
 
 
 
95fc20d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461f0ea
95fc20d
 
 
 
 
 
 
 
461f0ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95fc20d
461f0ea
95fc20d
 
461f0ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8c8f24
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os

os.system("python -m pip install transformers==4.26.1")
os.system("python -m pip install spacy==3.5.4")
os.system("python -m pip install spacy-alignments==0.9.1")
os.system("python -m pip install spacy-legacy==3.0.12")
os.system("python -m pip install spacy-loggers==1.0.3")
os.system("python -m pip install torch")
os.system("python -m pip install seaborn==0.11.2")
os.system("python -m pip install gradio==3.16.1")
os.system("python -m pip install typer==0.4.1")
os.system("python -m pip install pydantic==1.9.2")
os.system("python -m pip install matplotlib==3.4.3")
os.system("python -m pip install Flask")
os.system("python -m pip install sty==1.0.4")
os.system("python -m pip install numpy==1.26.4")

from flask import Flask, render_template, request, send_file, after_this_request
import hashlib
from pipeline import *
import csv

app = Flask(__name__, template_folder='templates', static_folder='templates')

# Set the folder for saving uploaded files
UPLOAD_FOLDER = 'uploads/'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.isdir(app.config['UPLOAD_FOLDER']):
    # Create the directory if it doesn't exist
    os.makedirs(app.config['UPLOAD_FOLDER'])

# Allowable file extensions for uploading
ALLOWED_EXTENSIONS = {'txt','csv'}

# Check if a file has an allowable extension
def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
def process_file(file_path):
    file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')

    with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
        if file_path.endswith(".txt"):
            lines = f.readlines()
            results = []
            for line in lines:
                if line.strip() != "":
                    result = run_pipeline(line.strip())
                    results.append(result)
        elif file_path.endswith(".csv"):
            reader = csv.reader(f)
            for line in reader:
                if line[0].strip() != "":
                    result = run_pipeline(line[0].strip())
                    results.append(result)
    result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
    @after_this_request
    def remove_file(response):
        try:
            os.remove(file_path)
            file_handle.close()
        except Exception as error:
            app.logger.error("Error removing or closing downloaded file handle", error)
        return response
    return result

# Home page route that allows users to upload files
@app.route('/')
def index():
    # Otherwise, render the index page
    return render_template('index.html')

# Handle file upload requests
@app.route('/', methods=['POST'])
def upload_file():
    # Get the uploaded file
    file = request.files['file']

    # If the user did not select any file, return an error message
    if not file:
        return 'No file selected'

    # If the file type is not allowed, return an error message
    if not allowed_file(file.filename):
        return 'File type not allowed'

    # Generate a unique hash code for the file name
    hash_code = hashlib.md5(file.read()).hexdigest()
    # if ".txt" in filename:
    filename = f"{hash_code}.txt"
    # elif ".csv" in filename:
    #     filename = f"{hash_code}.csv"
    file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

    # Save the uploaded file
    file.seek(0)
    file.save(file_path)

    # Process the uploaded file and return the result as a JSON line file
    result = process_file(file_path)
    result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')

    counts = {}
    individual_labels = {}
    props = {}
    for id_,text in enumerate(result):  
        individual_labels[id_] = []  
        counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
        for clause in text['output'][1]:
            individual_labels[id_].append(clause[1])
            label = labels2attrs[clause[1]]
            for id__,feature in enumerate(label):
                if "NA" not in feature:
                    counts[id_][feature] += 1
                elif id__ == 0:
                    counts[id_]["NA genericity"] += 1
                elif id__ == 1:
                    counts[id_]["NA eventivity"] += 1
                else:
                    counts[id_]["NA boundedness"] += 1
            props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
                counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
                counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
                counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]


    with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity:  proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
        for id_ in counts.keys():
            clauses = []
            for clause in result[id_]["output"][0]:
                clauses.append("{}: {}".format(clause[1],clause[0]))
            clauses = "\n".join(clauses)
            ind_labels = "\n".join(individual_labels[id_])
            extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
            writer.writerow(extracted)

    # Return the result file as a download once the processing is complete
    return send_file(result_file_path, as_attachment=True)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)