From: xangelo Date: Fri, 26 Jan 2024 19:32:25 +0000 (-0500) Subject: fix content snippet extraction X-Git-Url: https://git.xangelo.ca/?a=commitdiff_plain;h=fce0e0cd5ddd181dbd070992ea891b4d6bad0bad;p=river.git fix content snippet extraction --- diff --git a/src/server.ts b/src/server.ts index 9f22797..5b32531 100644 --- a/src/server.ts +++ b/src/server.ts @@ -15,49 +15,72 @@ const db = knex({ const parser = new Parser(); const app = express(); +function contentExtractor(feed: FeedSchema, item): string { + let str = ''; + switch(feed.title) { + case 'TechCrunch': + str = item.contentSnippet; + break; + case 'Ars Technica Features': + str = item.contentSnippet; + break; + } + + return str; +} + async function queryFeeds() { const completeFeedList = await db.select('*').from('feeds').orderBy('created_at'); const feedsToQuery = completeFeedList.filter(feed => { // only update every 5 minutes - return Date.now() > (feed.updated_at + 1000 * 60 * 5); + //return Date.now() > (feed.updated_at + 1000 * 60 * 5); + return true; }); console.log(`Querying ${feedsToQuery.length} feeds`); for(let feed of feedsToQuery) { console.log(`Querying ${feed.title}(${feed.url})`); - const data = await parser.parseURL(feed.url); - - const items: FeedEntrySchema[] = data.items.map(item => { - return { - id: item.guid, - title: item.title, - link: item.link, - feed_id: feed.id, - pub_date: Math.floor(new Date(item.pubDate).getTime()/1000), - author: item.creator, - created_at: Math.floor(Date.now()/1000), - updated_at: Math.floor(Date.now()/1000), - meta: { - comment_link: item.comments + try { + const data = await parser.parseURL(feed.url); + + const items: FeedEntrySchema[] = data.items.map(item => { + return { + id: item.guid, + title: item.title, + link: item.link, + feed_id: feed.id, + pub_date: Math.floor(new Date(item.pubDate).getTime()/1000), + author: item.creator, + created_at: Math.floor(Date.now()/1000), + updated_at: Math.floor(Date.now()/1000), + meta: { + comment_link: item.comments, + snippet: contentExtractor(feed, item) + } } + }); + + for(let item of items) { + // inserting one at a time so that we can ignore duplicates for now + // eventually we'll need to do some kind of merge + await db('feed_entry').insert(item).onConflict().ignore(); } - }); - for(let item of items) { - // inserting one at a time so that we can ignore duplicates for now - // eventually we'll need to do some kind of merge - await db('feed_entry').insert(item).onConflict().ignore(); - } + await db('feeds').update({ + updated_at: new Date() + }).where({ + id: feed.id + }); - await db('feeds').update({ - updated_at: new Date() - }).where({ - id: feed.id - }); + } + catch(e) { + console.log(e); + console.log('Continuing..'); + } } setTimeout(queryFeeds, (Number(process.env.FEED_REFRESH_RATE) + 1) * 1000 * 60); @@ -122,11 +145,11 @@ app.get('/river', async (req, res) => { }); res.send(groups.map(group => { - return `
- ${renderFeedItemHeader(group[0])} - ${group.map(renderFeedItem).join("\n")} -
`; - }).join("\n")); + return `
+${renderFeedItemHeader(group[0])} +${group.map(renderFeedItem).join("\n")} +
`; + }).join("\n")); }); app.post('/feeds', async (req, res) => { diff --git a/src/time.ts b/src/time.ts index f5c33fb..8bc4fc6 100644 --- a/src/time.ts +++ b/src/time.ts @@ -26,7 +26,7 @@ export function fuzzyTime(timestamp: number): string { hour = minute * 60, day = hour * 24; - let fuzzy: string = ''; + let fuzzy: string = formatDate(timestamp); if (delta < 30) { fuzzy = 'just then.'; @@ -40,8 +40,6 @@ export function fuzzyTime(timestamp: number): string { fuzzy = '1 hour ago.' } else if (delta < day) { fuzzy = Math.floor(delta / hour) + ' hours ago.'; - } else if (delta < day * 2) { - fuzzy = formatDate(timestamp); } return fuzzy; diff --git a/src/views.ts b/src/views.ts index 9e69542..8ca67d4 100644 --- a/src/views.ts +++ b/src/views.ts @@ -14,10 +14,13 @@ return ` } export function renderFeedItem(entry: FeedWithEntrySchema): string { + const meta = JSON.parse(entry.meta); + const date = new Date(entry.pub_date * 1000); return `
- ${fuzzyTime(new Date(entry.pub_date * 1000).getTime())} + ${fuzzyTime(date.getTime())} ${entry.title} + ${meta.snippet ? `

${meta.snippet}

` : ''}
` }