def search_similar_content(self, query: str, top_k: int = 3) -> List[Dict]: """Search for content similar to query using TF-IDF""" # Prepare documents (each page as a document) documents = [page['text'] for page in self.pages_text] documents.append(query) # Create TF-IDF matrix vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = vectorizer.fit_transform(documents) # Calculate similarity cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]) # Get top similar pages similar_indices = cosine_similarities.argsort()[0][-top_k:][::-1] results = [] for idx in similar_indices: if cosine_similarities[0][idx] > 0: results.append( 'page_number': self.pages_text[idx]['page_num'], 'similarity_score': float(cosine_similarities[0][idx]), 'excerpt': self.pages_text[idx]['text'][:500] ) return results
def _show_case_studies(self): print("\n📋 CASE STUDIES:") for i, case in enumerate(self.analyzer.case_studies[:5], 1): print(f"\ni. case['title']") print(f" case['description'][:200]...")
def _show_concepts(self): print("\n🔑 KEY CONCEPTS:") for i, concept in enumerate(self.analyzer.key_concepts[:15], 1): print(f"\ni. concept['term'].upper() (appears concept['frequency']x)") if concept['context']: print(f" Context: concept['context'][0][:150]...") urban planning lecture notes pdf
def extract_text_from_pdf(self) -> str: """Extract text from PDF file""" text = "" with open(self.pdf_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) for page_num, page in enumerate(pdf_reader.pages): page_text = page.extract_text() self.pages_text.append( 'page_num': page_num + 1, 'text': page_text ) text += page_text + "\n" self.full_text = text return text
def interactive_session(self): """Run interactive study session""" print("\n" + "="*60) print("📚 URBAN PLANNING STUDY ASSISTANT") print("="*60) print("\nCommands:") print(" 'concepts' - Show key concepts") print(" 'questions' - Generate study questions") print(" 'cases' - Show case studies") print(" 'summary' - Show lecture summary") print(" 'search [term]' - Search for specific topics") print(" 'quiz' - Take a quick quiz") print(" 'export' - Export analysis to JSON") print(" 'quit' - Exit") while True: command = input("\n📝 Enter command: ").strip().lower() if command == 'quit': print("Happy studying! 📖") break elif command == 'concepts': self._show_concepts() elif command == 'questions': self._show_questions() elif command == 'cases': self._show_case_studies() elif command == 'summary': self._show_summary() elif command.startswith('search'): term = command[7:].strip() if term: self._search(term) else: print("Please provide a search term (e.g., 'search zoning')") elif command == 'quiz': self._take_quiz() elif command == 'export': self.analyzer.export_to_json('urban_planning_analysis.json') else: print("Unknown command. Try 'concepts', 'questions', 'cases', 'summary', 'search [term]', 'quiz', or 'quit'") def search_similar_content(self, query: str, top_k: int = 3)
def _show_questions(self): questions = self.analyzer.generate_study_questions() print("\n❓ STUDY QUESTIONS:") for i, q in enumerate(questions, 1): print(f"\ni. q['question']") print(f" 💡 Hint: q['hint']")
def extract_key_concepts(self) -> List[Dict]: """Extract and rank key urban planning concepts""" stop_words = set(stopwords.words('english')) # Urban planning specific terminology planning_terms = [ 'zoning', 'land use', 'transportation', 'infrastructure', 'sustainability', 'urban design', 'smart growth', 'new urbanism', 'gentrification', 'affordable housing', 'public space', 'transit-oriented development', 'mixed-use', 'walkability', 'green infrastructure', 'climate resilience', 'urban renewal', 'community engagement', 'comprehensive plan', 'subdivision', 'environmental impact', 'historic preservation', 'urban sprawl', 'density', 'parking', 'complete streets', 'placemaking' ] # Tokenize and find frequencies words = word_tokenize(self.full_text.lower()) words = [w for w in words if w.isalpha() and w not in stop_words] # Count frequencies of planning terms concept_counts = Counter() for term in planning_terms: count = self.full_text.lower().count(term) if count > 0: concept_counts[term] = count # Extract context for each concept concepts = [] for concept, count in concept_counts.most_common(20): # Find sentences containing the concept sentences = sent_tokenize(self.full_text) context_sentences = [s for s in sentences if concept.lower() in s.lower()] context = context_sentences[:2] if context_sentences else [] concepts.append( 'term': concept, 'frequency': count, 'context': context ) self.key_concepts = concepts return concepts 📖") break elif command == 'concepts': self
def export_to_json(self, output_path: str): """Export all analysis results to JSON file""" output = 'metadata': 'source_file': self.pdf_path, 'total_pages': len(self.pages_text), 'total_words': len(self.full_text.split()) , 'summary': self.create_summary(), 'sections': self.sections, 'key_concepts': self.key_concepts, 'case_studies': self.case_studies, 'study_questions': self.generate_study_questions(), 'full_text_excerpt': self.full_text[:5000] # First 5000 chars with open(output_path, 'w', encoding='utf-8') as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"Analysis exported to output_path") class UrbanPlanningStudyAssistant: def init (self, analyzer: UrbanPlanningNotesAnalyzer): self.analyzer = analyzer