generated from mwc/lab_server
	Initial commit
This commit is contained in:
		
							
								
								
									
										51
									
								
								poem_server/app/models.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								poem_server/app/models.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
			
		||||
from banjo.models import Model, StringField, IntegerField, ForeignKey
 | 
			
		||||
from banjo.http import BadRequest
 | 
			
		||||
import pronouncing
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
class Line(Model):
 | 
			
		||||
    text = StringField()
 | 
			
		||||
    clean_text = StringField()
 | 
			
		||||
    line_number = IntegerField()
 | 
			
		||||
    poem = ForeignKey("Poem", related_name="lines")
 | 
			
		||||
    rhyme = ForeignKey("Rhyme", related_name="lines", null=True)
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        "The string representation of a line is just its text"
 | 
			
		||||
        return self.text
 | 
			
		||||
 | 
			
		||||
        "Returns lowercase text without any punctuation"
 | 
			
		||||
        return re.sub('[^a-z ]', '', self.text.lower())
 | 
			
		||||
 | 
			
		||||
    def last_word(self):
 | 
			
		||||
        "Gets the last word from a line, lower case and without punctuation."
 | 
			
		||||
        parts = self.clean_text.split()
 | 
			
		||||
        if parts:
 | 
			
		||||
            return parts[-1]
 | 
			
		||||
 | 
			
		||||
    def rhyming_lines(self):
 | 
			
		||||
        if self.rhyme:
 | 
			
		||||
            return self.rhyme.lines.exclude(clean_text__endswith=self.last_word())
 | 
			
		||||
        else:
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
class Poem(Model):
 | 
			
		||||
    gutenberg_id = IntegerField(unique=True)
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        "The string representation of a poem is all its lines, in order"
 | 
			
		||||
        return '\n'.join(line.text for line in self.lines.order_by('line_number'))
 | 
			
		||||
 | 
			
		||||
class Rhyme(Model):
 | 
			
		||||
    phones = StringField()
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def get_rhyme_for_word(self, word):
 | 
			
		||||
        phones = pronouncing.phones_for_word(word)
 | 
			
		||||
        if not phones:
 | 
			
		||||
            raise BadRequest(f"Couldn't figure out how to pronounce {word}")
 | 
			
		||||
        rhyming_phones = pronouncing.rhyming_part(phones[0])
 | 
			
		||||
        try:
 | 
			
		||||
            return Rhyme.objects.get(phones=rhyming_phones)
 | 
			
		||||
        except Rhyme.DoesNotExist:
 | 
			
		||||
            raise BadRequest(f"Sorry, no lines rhyme with {word}")
 | 
			
		||||
							
								
								
									
										8
									
								
								poem_server/app/views.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								poem_server/app/views.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
from banjo.urls import route_get, route_post
 | 
			
		||||
from banjo.http import BadRequest, NotFound
 | 
			
		||||
from app.models import Line, Poem, Rhyme
 | 
			
		||||
from random import choice, sample
 | 
			
		||||
 | 
			
		||||
@route_get('lines/random', args={})
 | 
			
		||||
def get_random_line(params):
 | 
			
		||||
    return {'line': Line.objects.random().text}
 | 
			
		||||
							
								
								
									
										81
									
								
								poem_server/import_poems.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								poem_server/import_poems.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,81 @@
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
from banjo.runner import setup_django
 | 
			
		||||
import requests
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
from argparse import ArgumentParser
 | 
			
		||||
import pronouncing
 | 
			
		||||
import gzip
 | 
			
		||||
 | 
			
		||||
corpus_url = "http://static.decontextualize.com/gutenberg-poetry-v001.ndjson.gz"
 | 
			
		||||
corpus_file = Path("corpus.gz")
 | 
			
		||||
corpus_length = 3085117
 | 
			
		||||
 | 
			
		||||
def download_corpus():
 | 
			
		||||
    "Downloads a few million lines of poetry"
 | 
			
		||||
    if not corpus_file.exists():
 | 
			
		||||
        print("Downloading a file with three million lines of poetry...")
 | 
			
		||||
        download = requests.get(corpus_url, stream=True)
 | 
			
		||||
        with corpus_file.open("wb") as f:
 | 
			
		||||
            for chunk in download.iter_content(chunk_size=1024):
 | 
			
		||||
                if chunk:
 | 
			
		||||
                    f.write(chunk)
 | 
			
		||||
                    f.flush()
 | 
			
		||||
    
 | 
			
		||||
def read_corpus(limit=None):
 | 
			
		||||
    "Reads one line at a time from the corpus"
 | 
			
		||||
    for i, line in enumerate(gzip.open(corpus_file)):
 | 
			
		||||
        if limit and i == limit:
 | 
			
		||||
            break
 | 
			
		||||
        yield json.loads(line.strip())
 | 
			
		||||
 | 
			
		||||
def clean(line):
 | 
			
		||||
    "Returns lowercase text without any punctuation"
 | 
			
		||||
    return re.sub('[^a-z ]', '', line.lower()).strip()
 | 
			
		||||
 | 
			
		||||
def get_last_word(line):
 | 
			
		||||
    """Gets the last word from a line.
 | 
			
		||||
    Strips out punctuation, then splits the line on spaces and returns the final word.
 | 
			
		||||
    """
 | 
			
		||||
    parts = clean(line).split()
 | 
			
		||||
    if parts:
 | 
			
		||||
        return parts[-1]
 | 
			
		||||
 | 
			
		||||
def import_poems(limit=None):
 | 
			
		||||
    "Imports each line into the app"
 | 
			
		||||
    database_file = Path("database.sqlite")
 | 
			
		||||
    if database_file.exists():
 | 
			
		||||
        database_file.unlink()
 | 
			
		||||
    setup_django()
 | 
			
		||||
    from app.models import Line, Poem, Rhyme
 | 
			
		||||
    print("Importing lines into the app's database...")
 | 
			
		||||
    line_counts = defaultdict(int)
 | 
			
		||||
    for row in tqdm(read_corpus(limit=limit), total=limit or corpus_length):
 | 
			
		||||
        text = row['s']
 | 
			
		||||
        last_word = get_last_word(text)
 | 
			
		||||
        if not last_word:
 | 
			
		||||
            continue
 | 
			
		||||
        last_word_phones = pronouncing.phones_for_word(last_word)
 | 
			
		||||
        if last_word_phones:
 | 
			
		||||
            rhyming_phones = pronouncing.rhyming_part(last_word_phones[0])
 | 
			
		||||
            rhyme, _ = Rhyme.objects.get_or_create(phones=rhyming_phones)
 | 
			
		||||
        else:
 | 
			
		||||
            rhyme=None
 | 
			
		||||
        poem, _ = Poem.objects.get_or_create(gutenberg_id=row['gid'])
 | 
			
		||||
        line = Line.objects.create(
 | 
			
		||||
            text=text,
 | 
			
		||||
            clean_text=clean(text),
 | 
			
		||||
            line_number=line_counts[row['gid']],
 | 
			
		||||
            poem=poem,
 | 
			
		||||
            rhyme=rhyme,
 | 
			
		||||
        )
 | 
			
		||||
        line_counts[row['gid']] += 1
 | 
			
		||||
 | 
			
		||||
parser = ArgumentParser()
 | 
			
		||||
parser.add_argument('-l', '--limit', type=int)
 | 
			
		||||
args = parser.parse_args()
 | 
			
		||||
download_corpus()
 | 
			
		||||
import_poems(args.limit)
 | 
			
		||||
		Reference in New Issue
	
	Block a user